00001 /*!@file Channels/ObjDetChannel.C object detection channel using opecv cascade detector */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00005 // by the University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Lior Elazary 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Channels/ObjDetChannel.C $ 00035 // $Id: ObjDetChannel.C 12821 2010-02-11 07:15:07Z itti $ 00036 // 00037 00038 #include "Channels/ObjDetChannel.H" 00039 #include "Image/DrawOps.H" 00040 #include "Image/Kernels.H" // for gaussianBlob() 00041 #include "Image/MathOps.H" 00042 #include "Image/ShapeOps.H" 00043 #include "Image/Transforms.H" 00044 #include "Channels/ChannelOpts.H" 00045 #include "Component/ModelOptionDef.H" 00046 #include "Component/GlobalOpts.H" 00047 00048 static const ModelOptionDef OPT_CascadeFilePath = 00049 { MODOPT_ARG_STRING, "Cascade file path", &MOC_CHANNEL, OPTEXP_CORE, 00050 "Name of directory containing the description of a trained cascade classifier." 00051 "Used in making faces salient or any other object. ", 00052 "cascade-file", '\0', "<filename>.xml", 00053 //"/usr/local/share/opencv/haarcascades/haarcascade_frontalface_alt2.xml"}; 00054 "/usr/share/opencv/haarcascades/haarcascade_frontalface_alt2.xml"}; 00055 00056 ObjDetChannel::ObjDetChannel(OptionManager& mgr, const std::string & descrName, 00057 const std::string& tagName) : 00058 ChannelBase(mgr, descrName, tagName, FACE), 00059 itsMap(), 00060 itsLevelSpec(&OPT_LevelSpec, this), 00061 itsCascadeFile(&OPT_CascadeFilePath, this), 00062 itsNormType(&OPT_MaxNormType, this), // see Channels/ChannelOpts.{H,C} 00063 itsOutputRangeMin(&OPT_ChannelOutputRangeMin, this), 00064 itsOutputRangeMax(&OPT_ChannelOutputRangeMax, this), 00065 itsUseRandom(&OPT_UseRandom, this), 00066 itsNormalizeOutput("SingleChannelNormalizeOutput", this, false) 00067 { 00068 00069 #ifdef HAVE_OPENCV 00070 cascade = (CvHaarClassifierCascade*)cvLoad( itsCascadeFile.getVal().c_str(), 0, 0, 0 ); 00071 00072 if( !cascade ) 00073 LFATAL("ERROR: Could not load classifier cascade (%s)\n", itsCascadeFile.getVal().c_str() ); 00074 storage = cvCreateMemStorage(0); 00075 #else 00076 LFATAL("OpenCV is needed for ObjDet channel"); 00077 #endif 00078 00079 00080 } 00081 00082 // ###################################################################### 00083 ObjDetChannel::~ObjDetChannel() 00084 { } 00085 00086 // ###################################################################### 00087 bool ObjDetChannel::outputAvailable() const 00088 { return itsMap.initialized(); } 00089 00090 // ###################################################################### 00091 uint ObjDetChannel::numSubmaps() const 00092 { 00093 return 1; 00094 } 00095 00096 // ###################################################################### 00097 Dims ObjDetChannel::getMapDims() const 00098 { 00099 if (!this->hasInput()) 00100 LFATAL("Oops! I haven't received any input yet"); 00101 00102 const Dims indims = this->getInputDims(); 00103 00104 return Dims(indims.w() >> itsLevelSpec.getVal().mapLevel(), 00105 indims.h() >> itsLevelSpec.getVal().mapLevel()); 00106 00107 } 00108 00109 // ###################################################################### 00110 void ObjDetChannel::getFeatures(const Point2D<int>& locn, 00111 std::vector<float>& mean) const 00112 { 00113 if (!this->outputAvailable()) 00114 { CLDEBUG("I have no input yet -- RETURNING ZEROS"); mean.push_back(0.0F); return; } 00115 00116 // The coordinates we receive are at the scale of the original 00117 // image, and we will need to rescale them to the size of the 00118 // various submaps we read from. The first image in our first 00119 // pyramid has the dims of the input: 00120 const Dims indims = this->getInputDims(); 00121 mean.push_back(itsMap.getValInterpScaled(locn, indims)); 00122 } 00123 00124 // ###################################################################### 00125 void ObjDetChannel::getFeaturesBatch(std::vector<Point2D<int>*> *locn, 00126 std::vector<std::vector<float> > *mean, 00127 int *count) const 00128 { 00129 if (!this->outputAvailable()) 00130 { 00131 CLDEBUG("I have no input yet -- RETURNING ZEROS"); 00132 std::vector<std::vector<float> >::iterator imean = mean->begin(); 00133 for (int i = 0; i < *count; i++, ++imean) imean->push_back(0.0); 00134 return; 00135 } 00136 00137 // The coordinates we receive are at the scale of the original 00138 // image, and we will need to rescale them to the size of the 00139 // various submaps we read from. The first image in our first 00140 // pyramid has the dims of the input: 00141 const Dims indims = this->getInputDims(); 00142 00143 std::vector<Point2D<int>*>::iterator ilocn = locn->begin(); 00144 std::vector<std::vector<float> >::iterator imean = mean->begin(); 00145 00146 for (int i = 0; i < *count; ++i, ++ilocn, ++imean) 00147 imean->push_back(itsMap.getValInterpScaled(**ilocn, indims)); 00148 } 00149 00150 // ###################################################################### 00151 void ObjDetChannel::doInput(const InputFrame& inframe) 00152 { 00153 ASSERT(inframe.grayFloat().initialized()); 00154 Image<byte> lum = inframe.grayFloat(); 00155 00156 #ifdef HAVE_OPENCV 00157 const double scale = 1.3; 00158 IplImage* small_img = 00159 cvCreateImage(cvSize(cvRound(lum.getWidth() / scale), cvRound(lum.getHeight() / scale)), 8, 1 ); 00160 00161 cvResize(img2ipl(lum), small_img, CV_INTER_LINEAR); 00162 cvEqualizeHist(small_img, small_img); 00163 cvClearMemStorage(storage); 00164 00165 if (cascade) 00166 { 00167 double t = double(cvGetTickCount()); 00168 CvSeq* objects = cvHaarDetectObjects(small_img, cascade, storage, 00169 1.1, 2, 0/*CV_HAAR_DO_CANNY_PRUNING*/, 00170 cvSize(30, 30)); 00171 t = double(cvGetTickCount()) - t; 00172 LDEBUG( "detection time = %gms", t / (double(cvGetTickFrequency())*1000.0)); 00173 00174 itsMap = Image<float>(lum.getDims(), ZEROS); 00175 for (int i = 0; i < (objects ? objects->total : 0); ++i ) 00176 { 00177 CvRect* r = (CvRect*)cvGetSeqElem(objects, i); 00178 00179 Rectangle objRect(Point2D<int>(int(r->x*scale), int(r->y*scale)), 00180 Dims(int(r->width*scale), int(r->height*scale))); 00181 00182 const Point2D<int> objCenter = Point2D<int>(objRect.topLeft().i + objRect.width()/2, 00183 objRect.topLeft().j + objRect.height()/2); 00184 00185 Image<float> objBlob = 00186 gaussianBlobUnnormalized<float>(lum.getDims(), objCenter, 00187 float(objRect.width())/2, float(objRect.height())/2); 00188 00189 itsMap += objBlob; 00190 } 00191 } 00192 00193 inplaceRectify(itsMap); // eliminate any possible negative values (from rounding) 00194 itsMap = rescale(itsMap, this->getMapDims()); // scale to final dims 00195 00196 // normalize range and add background noise if desired: 00197 inplaceNormalize(itsMap, 0.0F, 255.0F); 00198 if (itsUseRandom.getVal()) inplaceAddBGnoise(itsMap, 255.0F); 00199 00200 // apply max-normalization on the output as needed: 00201 if (itsNormalizeOutput.getVal()) 00202 { 00203 LDEBUG("%s: Normalizing output: %s(%f .. %f)", tagName().c_str(), 00204 maxNormTypeName(itsNormType.getVal()), itsOutputRangeMin.getVal(), 00205 itsOutputRangeMax.getVal()); 00206 00207 itsMap = maxNormalize(itsMap, itsOutputRangeMin.getVal(), 00208 itsOutputRangeMax.getVal(), itsNormType.getVal()); 00209 } 00210 00211 cvReleaseImage(&small_img); 00212 #endif 00213 } 00214 00215 // ###################################################################### 00216 Image<float> ObjDetChannel::getSubmap(const uint index) const 00217 { 00218 if (index != 0) 00219 LFATAL("got submap index = %u, but I have only one submap", index); 00220 00221 return itsMap; 00222 } 00223 00224 // ###################################################################### 00225 std::string ObjDetChannel::getSubmapName(const uint index) const 00226 { 00227 return "ObjDet"; 00228 } 00229 00230 // ###################################################################### 00231 std::string ObjDetChannel::getSubmapNameShort(const uint index) const 00232 { 00233 return "ObjDet"; 00234 } 00235 00236 00237 // ###################################################################### 00238 Image<float> ObjDetChannel::getOutput() 00239 { return itsMap; } 00240 00241 00242 // ###################################################################### 00243 /* So things look consistent in everyone's emacs... */ 00244 /* Local Variables: */ 00245 /* indent-tabs-mode: nil */ 00246 /* End: */