00001 /*!@file Features/HOG.C */ 00002 00003 00004 // //////////////////////////////////////////////////////////////////// // 00005 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00006 // by the University of Southern California (USC) and the iLab at USC. // 00007 // See http://iLab.usc.edu for information about this project. // 00008 // //////////////////////////////////////////////////////////////////// // 00009 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00010 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00011 // in Visual Environments, and Applications'' by Christof Koch and // 00012 // Laurent Itti, California Institute of Technology, 2001 (patent // 00013 // pending; application number 09/912,225 filed July 23, 2001; see // 00014 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00015 // //////////////////////////////////////////////////////////////////// // 00016 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00017 // // 00018 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00019 // redistribute it and/or modify it under the terms of the GNU General // 00020 // Public License as published by the Free Software Foundation; either // 00021 // version 2 of the License, or (at your option) any later version. // 00022 // // 00023 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00024 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00025 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00026 // PURPOSE. See the GNU General Public License for more details. // 00027 // // 00028 // You should have received a copy of the GNU General Public License // 00029 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00030 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00031 // Boston, MA 02111-1307 USA. // 00032 // //////////////////////////////////////////////////////////////////// // 00033 // 00034 // Primary maintainer for this file: Lior Elazary 00035 // $HeadURL$ 00036 // $Id$ 00037 // 00038 00039 #include "Features/HOG.H" 00040 #include "Image/DrawOps.H" 00041 #include "Image/MathOps.H" 00042 #include "Image/Kernels.H" 00043 #include "Image/CutPaste.H" 00044 #include "Image/ColorOps.H" 00045 #include "Image/FilterOps.H" 00046 #include "Image/ShapeOps.H" 00047 #include "SIFT/FeatureVector.H" 00048 #include "GUI/DebugWin.H" 00049 00050 #include <stdio.h> 00051 00052 HOG::HOG() 00053 { 00054 } 00055 00056 HOG::~HOG() 00057 { 00058 } 00059 00060 ImageSet<float> HOG::getFeatures(const Image<PixRGB<byte> >& img, int numBins) 00061 { 00062 int itsNumOrientations = 18; 00063 00064 Image<float> mag, ori; 00065 getMaxGradient(img, mag, ori, itsNumOrientations); 00066 00067 ImageSet<float> histogram = getOriHistogram(mag, ori, itsNumOrientations, numBins); 00068 00069 ImageSet<double> features = computeFeatures(histogram); 00070 00071 return features; 00072 } 00073 00074 Image<PixRGB<byte> > HOG::getHistogramImage(const ImageSet<float>& hist, const int lineSize) 00075 { 00076 if (hist.size() == 0) 00077 return Image<PixRGB<byte> >(); 00078 00079 00080 Image<float> img(hist[0].getDims()*lineSize, ZEROS); 00081 //Create a one histogram with the maximum features for the 9 orientations 00082 //TODO: features need to be separated 00083 for(uint feature=0; feature<9; feature++) 00084 { 00085 float ori = (float)feature/M_PI + (M_PI/2); 00086 for(int y=0; y<hist[feature].getHeight(); y++) 00087 { 00088 for(int x=0; x<hist[feature].getWidth(); x++) 00089 { 00090 float histVal = hist[feature].getVal(x,y); 00091 00092 //TODO: is this redundant since the first 9 features are 00093 //contained in the signed 18 features? 00094 if (hist[feature+9].getVal(x,y) > histVal) 00095 histVal = hist[feature+9].getVal(x,y); 00096 if (hist[feature+18].getVal(x,y) > histVal) 00097 histVal = hist[feature+18].getVal(x,y); 00098 if (histVal < 0) histVal = 0; //TODO: do we want this? 00099 00100 drawLine(img, Point2D<int>((lineSize/2) + x*lineSize, 00101 (lineSize/2) + y*lineSize), 00102 -ori, lineSize, 00103 histVal); 00104 } 00105 } 00106 } 00107 00108 inplaceNormalize(img, 0.0F, 255.0F); 00109 00110 return toRGB(img); 00111 } 00112 00113 ImageSet<double> HOG::computeFeatures(const ImageSet<float>& hist) 00114 { 00115 00116 // compute energy in each block by summing over orientations 00117 Image<double> norm = getHistogramEnergy(hist); 00118 00119 const int w = norm.getWidth(); 00120 const int h = norm.getHeight(); 00121 00122 const int numFeatures = hist.size() + //Contrast-sensitive features 00123 hist.size()/2 + //contrast-insensitive features 00124 4 + //texture features 00125 1; //trancation feature (this is zero map???) 00126 00127 const int featuresW = std::max(w-2, 0); 00128 const int featuresH = std::max(h-2, 0); 00129 00130 ImageSet<double> features(numFeatures, Dims(featuresW, featuresH), ZEROS); 00131 00132 Image<double>::const_iterator normPtr = norm.begin(); 00133 Image<double>::const_iterator ptr; 00134 00135 // small value, used to avoid division by zero 00136 const double eps = 0.0001; 00137 00138 for(int y=0; y<featuresH; y++) 00139 for(int x=0; x<featuresW; x++) 00140 { 00141 00142 //Combine the norm values of neighboring bins 00143 ptr = normPtr + (y+1)*w + x+1; 00144 const double n1 = 1.0 / sqrt(*ptr + *(ptr+1) + 00145 *(ptr+w) + *(ptr+w+1) + 00146 eps); 00147 ptr = normPtr + y*w + x+1; 00148 const double n2 = 1.0 / sqrt(*ptr + *(ptr+1) + 00149 *(ptr+w) + *(ptr+w+1) + 00150 eps); 00151 ptr = normPtr + (y+1)*w + x; 00152 const double n3 = 1.0 / sqrt(*ptr + *(ptr+1) + 00153 *(ptr+w) + *(ptr+w+1) + 00154 eps); 00155 ptr = normPtr + y*w + x; 00156 const double n4 = 1.0 / sqrt(*ptr + *(ptr+1) + 00157 *(ptr+w) + *(ptr+w+1) + 00158 eps); 00159 00160 //For texture features 00161 double t1 = 0, t2 = 0, t3 = 0, t4 = 0; 00162 00163 // contrast-sensitive features 00164 uint featureId = 0; 00165 for(uint ori=0; ori < hist.size(); ori++) 00166 { 00167 Image<float>::const_iterator histPtr = hist[ori].begin(); 00168 const float histVal = histPtr[(y+1)*w + x+1]; 00169 double h1 = std::min(histVal * n1, 0.2); 00170 double h2 = std::min(histVal * n2, 0.2); 00171 double h3 = std::min(histVal * n3, 0.2); 00172 double h4 = std::min(histVal * n4, 0.2); 00173 00174 t1 += h1; t2 += h2; t3 += h3; t4 += h4; 00175 00176 Image<double>::iterator featuresPtr = features[featureId++].beginw(); 00177 featuresPtr[y*featuresW + x] = 0.5 * (h1 + h2 + h3 + h4); 00178 } 00179 00180 // contrast-insensitive features 00181 int halfOriSize = hist.size()/2; 00182 for(int ori=0; ori < halfOriSize; ori++) 00183 { 00184 Image<float>::const_iterator histPtr1 = hist[ori].begin(); 00185 Image<float>::const_iterator histPtr2 = hist[ori+halfOriSize].begin(); 00186 const double sum = histPtr1[(y+1)*w + x+1] + histPtr2[(y+1)*w + x+1]; 00187 double h1 = std::min(sum * n1, 0.2); 00188 double h2 = std::min(sum * n2, 0.2); 00189 double h3 = std::min(sum * n3, 0.2); 00190 double h4 = std::min(sum * n4, 0.2); 00191 00192 Image<double>::iterator featuresPtr = features[featureId++].beginw(); 00193 featuresPtr[y*featuresW + x] = 0.5 * (h1 + h2 + h3 + h4); 00194 } 00195 00196 // texture features 00197 Image<double>::iterator featuresPtr = features[featureId++].beginw(); 00198 featuresPtr[y*featuresW + x] = 0.2357 * t1; 00199 00200 featuresPtr = features[featureId++].beginw(); 00201 featuresPtr[y*featuresW + x] = 0.2357 * t2; 00202 00203 featuresPtr = features[featureId++].beginw(); 00204 featuresPtr[y*featuresW + x] = 0.2357 * t3; 00205 00206 featuresPtr = features[featureId++].beginw(); 00207 featuresPtr[y*featuresW + x] = 0.2357 * t4; 00208 00209 // truncation feature 00210 // This seems to be just 0, do we need it? 00211 featuresPtr = features[featureId++].beginw(); 00212 featuresPtr[y*featuresW + x] = 0; 00213 00214 } 00215 00216 00217 00218 return features; 00219 00220 } 00221 00222 Image<double> HOG::getHistogramEnergy(const ImageSet<float>& hist) 00223 { 00224 if (hist.size() == 0) 00225 return Image<double>(); 00226 00227 Image<double> norm(hist[0].getDims(), ZEROS); 00228 00229 //TODO: check for overflow 00230 int halfOriSize = hist.size()/2; 00231 // compute energy in each block by summing over orientations 00232 for(int ori=0; ori<halfOriSize; ori++) 00233 { 00234 Image<float>::const_iterator src1Ptr = hist[ori].begin(); 00235 Image<float>::const_iterator src2Ptr = hist[ori+halfOriSize].begin(); 00236 00237 Image<double>::iterator normPtr = norm.beginw(); 00238 Image<double>::const_iterator normPtrEnd = norm.end(); 00239 00240 while(normPtr < normPtrEnd) 00241 { 00242 *(normPtr++) += (*src1Ptr + *src2Ptr) * (*src1Ptr + *src2Ptr); 00243 src1Ptr++; 00244 src2Ptr++; 00245 } 00246 } 00247 00248 return norm; 00249 } 00250 00251 ImageSet<float> HOG::getOriHistogram(const Image<float>& mag, const Image<float>& ori, int numOrientations, int numBins) 00252 { 00253 Dims blocksDims = Dims( 00254 (int)round((double)mag.getWidth()/double(numBins)), 00255 (int)round((double)mag.getHeight()/double(numBins))); 00256 00257 ImageSet<float> hist(numOrientations, blocksDims, ZEROS); 00258 00259 Image<float>::const_iterator magPtr = mag.begin(), oriPtr = ori.begin(); 00260 //Set the with an height to a whole bin numbers. 00261 //If needed replicate the data when summing the bins 00262 int w = blocksDims.w()*numBins; 00263 int h = blocksDims.h()*numBins; 00264 int magW = mag.getWidth(); 00265 int magH = mag.getHeight(); 00266 int histWidth = blocksDims.w(); 00267 int histHeight = blocksDims.h(); 00268 00269 for (int y = 1; y < h-1; y ++) 00270 for (int x = 1; x < w-1; x ++) 00271 { 00272 // add to 4 histograms around pixel using linear interpolation 00273 double xp = ((double)x+0.5)/(double)numBins - 0.5; 00274 double yp = ((double)y+0.5)/(double)numBins - 0.5; 00275 int ixp = (int)floor(xp); 00276 int iyp = (int)floor(yp); 00277 double vx0 = xp-ixp; 00278 double vy0 = yp-iyp; 00279 double vx1 = 1.0-vx0; 00280 double vy1 = 1.0-vy0; 00281 00282 00283 //If we are outside out mag/ori data, then use the last values in it 00284 int magX = std::min(x, magW-2); 00285 int magY = std::min(y, magH-2); 00286 double mag = magPtr[magY*magW + magX]; 00287 int ori = int(oriPtr[magY*magW + magX]); 00288 00289 Image<float>::iterator histPtr = hist[ori].beginw(); 00290 00291 if (ixp >= 0 && iyp >= 0) 00292 histPtr[iyp*histWidth + ixp] += vx1*vy1*mag; 00293 00294 if (ixp+1 < histWidth && iyp >= 0) 00295 histPtr[iyp*histWidth + ixp+1] += vx0*vy1*mag; 00296 00297 if (ixp >= 0 && iyp+1 < histHeight) 00298 histPtr[(iyp+1)*histWidth + ixp] += vx1*vy0*mag; 00299 00300 if (ixp+1 < histWidth && iyp+1 < histHeight) 00301 histPtr[(iyp+1)*histWidth + ixp+1] += vx0*vy0*mag; 00302 } 00303 00304 return hist; 00305 } 00306 00307 void HOG::getMaxGradient(const Image<PixRGB<byte> >& img, 00308 Image<float>& mag, Image<float>& ori, 00309 int numOrientations) 00310 { 00311 if (numOrientations != 0 && 00312 numOrientations > 18) 00313 LFATAL("Can only support up to 18 orientations for now."); 00314 00315 mag.resize(img.getDims()); ori.resize(img.getDims()); 00316 00317 Image<PixRGB<byte> >::const_iterator src = img.begin(); 00318 Image<float>::iterator mPtr = mag.beginw(), oPtr = ori.beginw(); 00319 const int w = mag.getWidth(), h = mag.getHeight(); 00320 00321 float zero = 0; 00322 00323 // first row is all zeros: 00324 for (int i = 0; i < w; i ++) { *mPtr ++ = zero; *oPtr ++ = zero; } 00325 src += w; 00326 00327 // loop over inner rows: 00328 for (int j = 1; j < h-1; j ++) 00329 { 00330 // leftmost pixel is zero: 00331 *mPtr ++ = zero; *oPtr ++ = zero; ++ src; 00332 00333 // loop over inner columns: 00334 for (int i = 1; i < w-1; i ++) 00335 { 00336 PixRGB<int> valx = src[1] - src[-1]; 00337 PixRGB<int> valy = src[w] - src[-w]; 00338 00339 //Mag 00340 double mag1 = (valx.red()*valx.red()) + (valy.red()*valy.red()); 00341 double mag2 = (valx.green()*valx.green()) + (valy.green()*valy.green()); 00342 double mag3 = (valx.blue()*valx.blue()) + (valy.blue()*valy.blue()); 00343 00344 double mag = mag1; 00345 double dx = valx.red(); 00346 double dy = valy.red(); 00347 00348 //Get the channel with the strongest gradient 00349 if (mag2 > mag) 00350 { 00351 dx = valx.green(); 00352 dy = valy.green(); 00353 mag = mag2; 00354 } 00355 if (mag3 > mag) 00356 { 00357 dx = valx.blue(); 00358 dy = valy.blue(); 00359 mag = mag3; 00360 } 00361 00362 *mPtr++ = sqrt(mag); 00363 if (numOrientations > 0) 00364 { 00365 //Snap to num orientations 00366 double bestDot = 0; 00367 int bestOri = 0; 00368 for (int ori = 0; ori < numOrientations/2; ori++) { 00369 double dot = itsUU[ori]*dx + itsVV[ori]*dy; 00370 if (dot > bestDot) { 00371 bestDot = dot; 00372 bestOri = ori; 00373 } else if (-dot > bestDot) { 00374 bestDot = -dot; 00375 bestOri = ori+(numOrientations/2); 00376 } 00377 } 00378 *oPtr++ = bestOri; 00379 00380 } else { 00381 *oPtr++ = atan2(dy, dx); 00382 } 00383 ++ src; 00384 } 00385 00386 // rightmost pixel is zero: 00387 *mPtr ++ = zero; *oPtr ++ = zero; ++ src; 00388 } 00389 00390 // last row is all zeros: 00391 for (int i = 0; i < w; i ++) { *mPtr ++ = zero; *oPtr ++ = zero; } 00392 } 00393 00394 // ###################################################################### 00395 /* So things look consistent in everyone's emacs... */ 00396 /* Local Variables: */ 00397 /* indent-tabs-mode: nil */ 00398 /* End: */