HOG.C

Go to the documentation of this file.
00001 /*!@file Features/HOG.C */
00002 
00003 
00004 // //////////////////////////////////////////////////////////////////// //
00005 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005   //
00006 // by the University of Southern California (USC) and the iLab at USC.  //
00007 // See http://iLab.usc.edu for information about this project.          //
00008 // //////////////////////////////////////////////////////////////////// //
00009 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00010 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00011 // in Visual Environments, and Applications'' by Christof Koch and      //
00012 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00013 // pending; application number 09/912,225 filed July 23, 2001; see      //
00014 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00015 // //////////////////////////////////////////////////////////////////// //
00016 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00017 //                                                                      //
00018 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00019 // redistribute it and/or modify it under the terms of the GNU General  //
00020 // Public License as published by the Free Software Foundation; either  //
00021 // version 2 of the License, or (at your option) any later version.     //
00022 //                                                                      //
00023 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00024 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00025 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00026 // PURPOSE.  See the GNU General Public License for more details.       //
00027 //                                                                      //
00028 // You should have received a copy of the GNU General Public License    //
00029 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00030 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00031 // Boston, MA 02111-1307 USA.                                           //
00032 // //////////////////////////////////////////////////////////////////// //
00033 //
00034 // Primary maintainer for this file: Lior Elazary
00035 // $HeadURL$
00036 // $Id$
00037 //
00038 
00039 #include "Features/HOG.H"
00040 #include "Image/DrawOps.H"
00041 #include "Image/MathOps.H"
00042 #include "Image/Kernels.H"
00043 #include "Image/CutPaste.H"
00044 #include "Image/ColorOps.H"
00045 #include "Image/FilterOps.H"
00046 #include "Image/ShapeOps.H"
00047 #include "SIFT/FeatureVector.H"
00048 #include "GUI/DebugWin.H"
00049 
00050 #include <stdio.h>
00051 
00052 HOG::HOG()
00053 {
00054 }
00055 
00056 HOG::~HOG()
00057 {
00058 }
00059 
00060 ImageSet<float> HOG::getFeatures(const Image<PixRGB<byte> >& img, int numBins)
00061 {
00062   int itsNumOrientations = 18;
00063 
00064   Image<float> mag, ori;
00065   getMaxGradient(img, mag, ori, itsNumOrientations);
00066 
00067   ImageSet<float> histogram = getOriHistogram(mag, ori, itsNumOrientations, numBins);
00068 
00069   ImageSet<double> features = computeFeatures(histogram);
00070 
00071   return features;
00072 }
00073 
00074 Image<PixRGB<byte> > HOG::getHistogramImage(const ImageSet<float>& hist, const int lineSize)
00075 {
00076   if (hist.size() == 0)
00077     return Image<PixRGB<byte> >();
00078 
00079 
00080   Image<float> img(hist[0].getDims()*lineSize, ZEROS);
00081   //Create a one histogram with the maximum features for the 9 orientations
00082   //TODO: features need to be separated
00083   for(uint feature=0; feature<9; feature++)
00084   {
00085     float ori = (float)feature/M_PI + (M_PI/2);
00086     for(int y=0; y<hist[feature].getHeight(); y++)
00087     {
00088       for(int x=0; x<hist[feature].getWidth(); x++)
00089       {
00090         float histVal = hist[feature].getVal(x,y);
00091 
00092         //TODO: is this redundant since the first 9 features are
00093         //contained in the signed 18 features?
00094         if (hist[feature+9].getVal(x,y) > histVal)
00095           histVal = hist[feature+9].getVal(x,y);
00096         if (hist[feature+18].getVal(x,y) > histVal)
00097           histVal = hist[feature+18].getVal(x,y);
00098         if (histVal < 0) histVal = 0; //TODO: do we want this?
00099 
00100         drawLine(img, Point2D<int>((lineSize/2) + x*lineSize,
00101                                    (lineSize/2) + y*lineSize),
00102                                    -ori, lineSize,
00103                                    histVal);
00104       }
00105     }
00106   }
00107 
00108   inplaceNormalize(img, 0.0F, 255.0F);
00109 
00110   return toRGB(img);
00111 }
00112 
00113 ImageSet<double> HOG::computeFeatures(const ImageSet<float>& hist)
00114 {
00115 
00116   // compute energy in each block by summing over orientations
00117   Image<double> norm = getHistogramEnergy(hist);
00118 
00119   const int w = norm.getWidth();
00120   const int h = norm.getHeight();
00121   
00122   const int numFeatures = hist.size() +   //Contrast-sensitive features
00123                     hist.size()/2 + //contrast-insensitive features
00124                     4 +             //texture features
00125                     1;              //trancation feature (this is zero map???)
00126 
00127   const int featuresW = std::max(w-2, 0);
00128   const int featuresH = std::max(h-2, 0);
00129 
00130   ImageSet<double> features(numFeatures, Dims(featuresW, featuresH), ZEROS);
00131 
00132   Image<double>::const_iterator normPtr = norm.begin();
00133   Image<double>::const_iterator ptr;
00134 
00135   // small value, used to avoid division by zero
00136   const double eps = 0.0001;
00137 
00138   for(int y=0; y<featuresH; y++)
00139     for(int x=0; x<featuresW; x++)
00140     {
00141 
00142       //Combine the norm values of neighboring bins
00143       ptr = normPtr + (y+1)*w + x+1;
00144       const double n1 = 1.0 / sqrt(*ptr + *(ptr+1) +
00145                                    *(ptr+w) + *(ptr+w+1) +
00146                                    eps);
00147       ptr = normPtr + y*w + x+1;
00148       const double n2 = 1.0 / sqrt(*ptr + *(ptr+1) +
00149                                    *(ptr+w) + *(ptr+w+1) +
00150                                    eps);
00151       ptr = normPtr + (y+1)*w + x;
00152       const double n3 = 1.0 / sqrt(*ptr + *(ptr+1) +
00153                                    *(ptr+w) + *(ptr+w+1) +
00154                                    eps);
00155       ptr = normPtr + y*w + x;      
00156       const double n4 = 1.0 / sqrt(*ptr + *(ptr+1) +
00157                                    *(ptr+w) + *(ptr+w+1) +
00158                                    eps);
00159 
00160       //For texture features
00161       double t1 = 0, t2 = 0, t3 = 0, t4 = 0;
00162 
00163       // contrast-sensitive features
00164       uint featureId = 0;
00165       for(uint ori=0; ori < hist.size(); ori++)
00166       {
00167         Image<float>::const_iterator histPtr = hist[ori].begin();
00168         const float histVal = histPtr[(y+1)*w + x+1];
00169         double h1 = std::min(histVal * n1, 0.2);
00170         double h2 = std::min(histVal * n2, 0.2);
00171         double h3 = std::min(histVal * n3, 0.2);
00172         double h4 = std::min(histVal * n4, 0.2);
00173 
00174         t1 += h1; t2 += h2; t3 += h3; t4 += h4;
00175 
00176         Image<double>::iterator featuresPtr = features[featureId++].beginw();
00177         featuresPtr[y*featuresW + x] = 0.5 * (h1 + h2 + h3 + h4);
00178       }
00179 
00180       // contrast-insensitive features
00181       int halfOriSize = hist.size()/2;
00182       for(int ori=0; ori < halfOriSize; ori++)
00183       {
00184         Image<float>::const_iterator histPtr1 = hist[ori].begin();
00185         Image<float>::const_iterator histPtr2 = hist[ori+halfOriSize].begin();
00186         const double sum = histPtr1[(y+1)*w + x+1] + histPtr2[(y+1)*w + x+1];
00187         double h1 = std::min(sum * n1, 0.2);
00188         double h2 = std::min(sum * n2, 0.2);
00189         double h3 = std::min(sum * n3, 0.2);
00190         double h4 = std::min(sum * n4, 0.2);
00191 
00192         Image<double>::iterator featuresPtr = features[featureId++].beginw();
00193         featuresPtr[y*featuresW + x] = 0.5 * (h1 + h2 + h3 + h4);
00194       }
00195 
00196       // texture features
00197       Image<double>::iterator featuresPtr = features[featureId++].beginw();
00198       featuresPtr[y*featuresW + x] = 0.2357 * t1;
00199 
00200       featuresPtr = features[featureId++].beginw();
00201       featuresPtr[y*featuresW + x] = 0.2357 * t2;
00202       
00203       featuresPtr = features[featureId++].beginw();
00204       featuresPtr[y*featuresW + x] = 0.2357 * t3;
00205 
00206       featuresPtr = features[featureId++].beginw();
00207       featuresPtr[y*featuresW + x] = 0.2357 * t4;
00208 
00209       // truncation feature
00210       // This seems to be just 0, do we need it?
00211       featuresPtr = features[featureId++].beginw();
00212       featuresPtr[y*featuresW + x] = 0;
00213 
00214     }
00215 
00216 
00217 
00218   return features;
00219 
00220 }
00221 
00222 Image<double> HOG::getHistogramEnergy(const ImageSet<float>& hist)
00223 {
00224   if (hist.size() == 0)
00225     return Image<double>();
00226 
00227   Image<double> norm(hist[0].getDims(), ZEROS);
00228 
00229   //TODO: check for overflow
00230   int halfOriSize = hist.size()/2;
00231   // compute energy in each block by summing over orientations
00232   for(int ori=0; ori<halfOriSize; ori++)
00233   {
00234     Image<float>::const_iterator src1Ptr = hist[ori].begin();
00235     Image<float>::const_iterator src2Ptr = hist[ori+halfOriSize].begin();
00236 
00237     Image<double>::iterator normPtr = norm.beginw();
00238     Image<double>::const_iterator normPtrEnd = norm.end();
00239 
00240     while(normPtr < normPtrEnd)
00241     {
00242       *(normPtr++) += (*src1Ptr + *src2Ptr) * (*src1Ptr + *src2Ptr);
00243       src1Ptr++;
00244       src2Ptr++;
00245     }
00246   }
00247 
00248   return norm;
00249 }
00250 
00251 ImageSet<float> HOG::getOriHistogram(const Image<float>& mag, const Image<float>& ori, int numOrientations, int numBins)
00252 {
00253   Dims blocksDims = Dims(
00254       (int)round((double)mag.getWidth()/double(numBins)),
00255       (int)round((double)mag.getHeight()/double(numBins)));
00256 
00257   ImageSet<float> hist(numOrientations, blocksDims, ZEROS);
00258 
00259   Image<float>::const_iterator magPtr = mag.begin(), oriPtr = ori.begin();
00260   //Set the with an height to a whole bin numbers. 
00261   //If needed replicate the data when summing the bins
00262   int w = blocksDims.w()*numBins; 
00263   int h = blocksDims.h()*numBins;
00264   int magW = mag.getWidth(); 
00265   int magH = mag.getHeight();
00266   int histWidth = blocksDims.w(); 
00267   int histHeight = blocksDims.h();
00268 
00269   for (int y = 1; y < h-1; y ++)
00270     for (int x = 1; x < w-1; x ++)
00271     {
00272       // add to 4 histograms around pixel using linear interpolation
00273       double xp = ((double)x+0.5)/(double)numBins - 0.5;
00274       double yp = ((double)y+0.5)/(double)numBins - 0.5;
00275       int ixp = (int)floor(xp);
00276       int iyp = (int)floor(yp);
00277       double vx0 = xp-ixp;
00278       double vy0 = yp-iyp;
00279       double vx1 = 1.0-vx0;
00280       double vy1 = 1.0-vy0;
00281       
00282 
00283       //If we are outside out mag/ori data, then use the last values in it
00284       int magX = std::min(x, magW-2);
00285       int magY = std::min(y, magH-2);
00286       double mag = magPtr[magY*magW  + magX];
00287       int ori = int(oriPtr[magY*magW + magX]);
00288 
00289       Image<float>::iterator histPtr = hist[ori].beginw();
00290 
00291       if (ixp >= 0 && iyp >= 0)
00292         histPtr[iyp*histWidth + ixp] += vx1*vy1*mag;
00293 
00294       if (ixp+1 < histWidth && iyp >= 0)
00295         histPtr[iyp*histWidth + ixp+1] += vx0*vy1*mag;
00296 
00297       if (ixp >= 0 && iyp+1 < histHeight) 
00298         histPtr[(iyp+1)*histWidth + ixp] += vx1*vy0*mag;
00299 
00300       if (ixp+1 < histWidth && iyp+1 < histHeight) 
00301         histPtr[(iyp+1)*histWidth + ixp+1] += vx0*vy0*mag;
00302     }
00303 
00304   return hist;
00305 }
00306 
00307 void HOG::getMaxGradient(const Image<PixRGB<byte> >& img,
00308     Image<float>& mag, Image<float>& ori,
00309     int numOrientations)
00310 {
00311   if (numOrientations != 0 &&
00312       numOrientations > 18)
00313     LFATAL("Can only support up to 18 orientations for now.");
00314   
00315   mag.resize(img.getDims()); ori.resize(img.getDims());
00316 
00317   Image<PixRGB<byte> >::const_iterator src = img.begin();
00318   Image<float>::iterator mPtr = mag.beginw(), oPtr = ori.beginw();
00319   const int w = mag.getWidth(), h = mag.getHeight();
00320 
00321   float zero = 0;
00322 
00323   // first row is all zeros:
00324   for (int i = 0; i < w; i ++) { *mPtr ++ = zero; *oPtr ++ = zero; }
00325   src += w;
00326 
00327   // loop over inner rows:
00328   for (int j = 1; j < h-1; j ++)
00329     {
00330       // leftmost pixel is zero:
00331       *mPtr ++ = zero; *oPtr ++ = zero; ++ src;
00332 
00333       // loop over inner columns:
00334       for (int i = 1; i < w-1; i ++)
00335         {
00336           PixRGB<int> valx = src[1] - src[-1];
00337           PixRGB<int> valy = src[w] - src[-w];
00338 
00339           //Mag
00340           double mag1 = (valx.red()*valx.red()) + (valy.red()*valy.red());
00341           double mag2 = (valx.green()*valx.green()) + (valy.green()*valy.green());
00342           double mag3 = (valx.blue()*valx.blue()) + (valy.blue()*valy.blue());
00343 
00344           double mag = mag1;
00345           double dx = valx.red();
00346           double dy = valy.red();
00347 
00348           //Get the channel with the strongest gradient
00349           if (mag2 > mag)
00350           {
00351             dx = valx.green();
00352             dy = valy.green();
00353             mag = mag2;
00354           }
00355           if (mag3 > mag)
00356           {
00357             dx = valx.blue();
00358             dy = valy.blue();
00359             mag = mag3;
00360           }
00361 
00362           *mPtr++ = sqrt(mag);
00363           if (numOrientations > 0)
00364           {
00365             //Snap to num orientations
00366             double bestDot = 0;
00367             int bestOri = 0;
00368             for (int ori = 0; ori < numOrientations/2; ori++) {
00369               double dot = itsUU[ori]*dx + itsVV[ori]*dy;
00370               if (dot > bestDot) {
00371                 bestDot = dot;
00372                 bestOri = ori;
00373               } else if (-dot > bestDot) {
00374                 bestDot = -dot;
00375                 bestOri = ori+(numOrientations/2);
00376               }
00377             }
00378             *oPtr++ = bestOri;
00379 
00380           } else {
00381             *oPtr++ = atan2(dy, dx);
00382           }
00383           ++ src;
00384         }
00385 
00386       // rightmost pixel is zero:
00387       *mPtr ++ = zero; *oPtr ++ = zero; ++ src;
00388     }
00389 
00390   // last row is all zeros:
00391   for (int i = 0; i < w; i ++) { *mPtr ++ = zero; *oPtr ++ = zero; }
00392 }
00393 
00394 // ######################################################################
00395 /* So things look consistent in everyone's emacs... */
00396 /* Local Variables: */
00397 /* indent-tabs-mode: nil */
00398 /* End: */