ObjDetChannel.C

Go to the documentation of this file.
00001 /*!@file Channels/ObjDetChannel.C object detection channel using opecv cascade detector */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005   //
00005 // by the University of Southern California (USC) and the iLab at USC.  //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: Lior Elazary
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Channels/ObjDetChannel.C $
00035 // $Id: ObjDetChannel.C 12821 2010-02-11 07:15:07Z itti $
00036 //
00037 
00038 #include "Channels/ObjDetChannel.H"
00039 #include "Image/DrawOps.H"
00040 #include "Image/Kernels.H"  // for gaussianBlob()
00041 #include "Image/MathOps.H"
00042 #include "Image/ShapeOps.H"
00043 #include "Image/Transforms.H"
00044 #include "Channels/ChannelOpts.H"
00045 #include "Component/ModelOptionDef.H"
00046 #include "Component/GlobalOpts.H"
00047 
00048 static const ModelOptionDef OPT_CascadeFilePath =
00049   { MODOPT_ARG_STRING, "Cascade file path", &MOC_CHANNEL, OPTEXP_CORE,
00050     "Name of directory containing the description of a trained cascade classifier."
00051     "Used in making faces salient or any other object.  ",
00052     "cascade-file", '\0', "<filename>.xml",
00053     //"/usr/local/share/opencv/haarcascades/haarcascade_frontalface_alt2.xml"};
00054     "/usr/share/opencv/haarcascades/haarcascade_frontalface_alt2.xml"};
00055 
00056 ObjDetChannel::ObjDetChannel(OptionManager& mgr, const std::string & descrName,
00057                              const std::string& tagName) :
00058   ChannelBase(mgr, descrName, tagName, FACE),
00059   itsMap(),
00060   itsLevelSpec(&OPT_LevelSpec, this),
00061   itsCascadeFile(&OPT_CascadeFilePath, this),
00062   itsNormType(&OPT_MaxNormType, this), // see Channels/ChannelOpts.{H,C}
00063   itsOutputRangeMin(&OPT_ChannelOutputRangeMin, this),
00064   itsOutputRangeMax(&OPT_ChannelOutputRangeMax, this),
00065   itsUseRandom(&OPT_UseRandom, this),
00066   itsNormalizeOutput("SingleChannelNormalizeOutput", this, false)
00067 {
00068 
00069 #ifdef HAVE_OPENCV
00070   cascade = (CvHaarClassifierCascade*)cvLoad( itsCascadeFile.getVal().c_str(), 0, 0, 0 );
00071 
00072   if( !cascade )
00073     LFATAL("ERROR: Could not load classifier cascade (%s)\n", itsCascadeFile.getVal().c_str() );
00074   storage = cvCreateMemStorage(0);
00075 #else
00076   LFATAL("OpenCV is needed for ObjDet channel");
00077 #endif
00078 
00079 
00080 }
00081 
00082 // ######################################################################
00083 ObjDetChannel::~ObjDetChannel()
00084 {  }
00085 
00086 // ######################################################################
00087 bool ObjDetChannel::outputAvailable() const
00088 { return itsMap.initialized(); }
00089 
00090 // ######################################################################
00091 uint ObjDetChannel::numSubmaps() const
00092 {
00093   return 1;
00094 }
00095 
00096 // ######################################################################
00097 Dims ObjDetChannel::getMapDims() const
00098 {
00099   if (!this->hasInput())
00100     LFATAL("Oops! I haven't received any input yet");
00101 
00102   const Dims indims = this->getInputDims();
00103 
00104   return Dims(indims.w() >> itsLevelSpec.getVal().mapLevel(),
00105               indims.h() >> itsLevelSpec.getVal().mapLevel());
00106 
00107 }
00108 
00109 // ######################################################################
00110 void ObjDetChannel::getFeatures(const Point2D<int>& locn,
00111                               std::vector<float>& mean) const
00112 {
00113   if (!this->outputAvailable())
00114     { CLDEBUG("I have no input yet -- RETURNING ZEROS"); mean.push_back(0.0F); return; }
00115 
00116   // The coordinates we receive are at the scale of the original
00117   // image, and we will need to rescale them to the size of the
00118   // various submaps we read from. The first image in our first
00119   // pyramid has the dims of the input:
00120   const Dims indims = this->getInputDims();
00121   mean.push_back(itsMap.getValInterpScaled(locn, indims));
00122 }
00123 
00124 // ######################################################################
00125 void ObjDetChannel::getFeaturesBatch(std::vector<Point2D<int>*> *locn,
00126                                      std::vector<std::vector<float> > *mean,
00127                                      int *count) const
00128 {
00129   if (!this->outputAvailable())
00130     {
00131       CLDEBUG("I have no input yet -- RETURNING ZEROS");
00132       std::vector<std::vector<float> >::iterator imean = mean->begin();
00133       for (int i = 0; i < *count; i++, ++imean) imean->push_back(0.0);
00134       return;
00135     }
00136 
00137   // The coordinates we receive are at the scale of the original
00138   // image, and we will need to rescale them to the size of the
00139   // various submaps we read from. The first image in our first
00140   // pyramid has the dims of the input:
00141   const Dims indims = this->getInputDims();
00142 
00143   std::vector<Point2D<int>*>::iterator ilocn = locn->begin();
00144   std::vector<std::vector<float> >::iterator imean = mean->begin();
00145 
00146   for (int i = 0; i < *count; ++i, ++ilocn, ++imean)
00147     imean->push_back(itsMap.getValInterpScaled(**ilocn, indims));
00148 }
00149 
00150 // ######################################################################
00151 void ObjDetChannel::doInput(const InputFrame& inframe)
00152 {
00153   ASSERT(inframe.grayFloat().initialized());
00154   Image<byte> lum = inframe.grayFloat();
00155 
00156 #ifdef HAVE_OPENCV
00157   const double scale = 1.3;
00158   IplImage* small_img =
00159     cvCreateImage(cvSize(cvRound(lum.getWidth() / scale), cvRound(lum.getHeight() / scale)), 8, 1 );
00160 
00161   cvResize(img2ipl(lum), small_img, CV_INTER_LINEAR);
00162   cvEqualizeHist(small_img, small_img);
00163   cvClearMemStorage(storage);
00164 
00165   if (cascade)
00166     {
00167       double t = double(cvGetTickCount());
00168       CvSeq* objects = cvHaarDetectObjects(small_img, cascade, storage,
00169                                            1.1, 2, 0/*CV_HAAR_DO_CANNY_PRUNING*/,
00170                                            cvSize(30, 30));
00171       t = double(cvGetTickCount()) - t;
00172       LDEBUG( "detection time = %gms", t / (double(cvGetTickFrequency())*1000.0));
00173 
00174       itsMap = Image<float>(lum.getDims(), ZEROS);
00175       for (int i = 0; i < (objects ? objects->total : 0); ++i )
00176         {
00177           CvRect* r = (CvRect*)cvGetSeqElem(objects, i);
00178 
00179           Rectangle objRect(Point2D<int>(int(r->x*scale), int(r->y*scale)),
00180                             Dims(int(r->width*scale), int(r->height*scale)));
00181 
00182           const Point2D<int> objCenter = Point2D<int>(objRect.topLeft().i + objRect.width()/2,
00183                                                       objRect.topLeft().j + objRect.height()/2);
00184 
00185           Image<float> objBlob =
00186             gaussianBlobUnnormalized<float>(lum.getDims(), objCenter,
00187                                             float(objRect.width())/2, float(objRect.height())/2);
00188 
00189           itsMap += objBlob;
00190         }
00191     }
00192 
00193   inplaceRectify(itsMap); // eliminate any possible negative values (from rounding)
00194   itsMap = rescale(itsMap, this->getMapDims()); // scale to final dims
00195 
00196   // normalize range and add background noise if desired:
00197   inplaceNormalize(itsMap, 0.0F, 255.0F);
00198   if (itsUseRandom.getVal()) inplaceAddBGnoise(itsMap, 255.0F);
00199 
00200   // apply max-normalization on the output as needed:
00201   if (itsNormalizeOutput.getVal())
00202     {
00203       LDEBUG("%s: Normalizing output: %s(%f .. %f)", tagName().c_str(),
00204              maxNormTypeName(itsNormType.getVal()), itsOutputRangeMin.getVal(),
00205              itsOutputRangeMax.getVal());
00206 
00207             itsMap = maxNormalize(itsMap, itsOutputRangeMin.getVal(),
00208                            itsOutputRangeMax.getVal(), itsNormType.getVal());
00209     }
00210 
00211   cvReleaseImage(&small_img);
00212 #endif
00213 }
00214 
00215 // ######################################################################
00216 Image<float> ObjDetChannel::getSubmap(const uint index) const
00217 {
00218   if (index != 0)
00219     LFATAL("got submap index = %u, but I have only one submap", index);
00220 
00221   return itsMap;
00222 }
00223 
00224 // ######################################################################
00225 std::string ObjDetChannel::getSubmapName(const uint index) const
00226 {
00227   return "ObjDet";
00228 }
00229 
00230 // ######################################################################
00231 std::string ObjDetChannel::getSubmapNameShort(const uint index) const
00232 {
00233   return "ObjDet";
00234 }
00235 
00236 
00237 // ######################################################################
00238 Image<float> ObjDetChannel::getOutput()
00239 { return itsMap; }
00240 
00241 
00242 // ######################################################################
00243 /* So things look consistent in everyone's emacs... */
00244 /* Local Variables: */
00245 /* indent-tabs-mode: nil */
00246 /* End: */