00001 /*!@file HMAX/HmaxFL.C Riesenhuber & Poggio's HMAX model for object recognition */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00005 // University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Laurent Itti <itti@usc.edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/HMAX/HmaxFL.C $ 00035 // $Id: HmaxFL.C 14376 2011-01-11 02:44:34Z pez $ 00036 // 00037 00038 #include <iostream> 00039 #include "HMAX/HmaxFL.H" 00040 #include "HMAX/Hmax.H" 00041 #include "Image/FilterOps.H" // for convolve() etc. 00042 #include "Image/Image.H" 00043 #include "Image/Kernels.H" // for dogFilter() 00044 #include "Image/MathOps.H" 00045 #include "Image/Normalize.H" 00046 #include "Image/CutPaste.H" 00047 #include "Raster/Raster.H" 00048 #include "Raster/RasterFileFormat.H" 00049 #include "Util/MathFunctions.H" 00050 #include "Util/Types.H" 00051 #include "Util/log.H" 00052 #include "Util/safecopy.H" 00053 00054 #include <cmath> 00055 #include <cstdio> 00056 #include <cstdlib> 00057 #include <cfloat> 00058 #include <limits> 00059 #include <stdexcept> 00060 00061 // ###################################################################### 00062 HmaxFL::HmaxFL() 00063 { initialized = false; } 00064 00065 // ###################################################################### 00066 HmaxFL::HmaxFL(const int nori, const std::vector<int>& spacess, 00067 const std::vector<int>& scaless, const int c1spaceol, 00068 const bool angleflag, const float s2t, const float s2s, 00069 const float gamma, const float divstart, const float divstep, 00070 const int fsmin, const int fsstep) 00071 { 00072 initialized = false; 00073 init(nori, spacess, scaless, c1spaceol, angleflag, s2t, s2s, gamma, divstart, divstep, fsmin, fsstep); 00074 initialized = true; 00075 } 00076 00077 // ###################################################################### 00078 void HmaxFL::init(const int nori, const std::vector<int>& spacess, 00079 const std::vector<int>& scaless, const int c1spaceol, 00080 const bool angleflag, const float s2t, const float s2s, 00081 const float gamma, const float divstart, const float divstep, 00082 const int fsmin, const int fsstep) 00083 { 00084 Hmax::init(nori,spacess,scaless,c1spaceol,angleflag,s2t,s2s); 00085 int curNSWB; 00086 c1Patches = 0; 00087 nswb = 0; 00088 // Determine the number of scales within band 00089 for (int sb = 0; sb < nsb; sb ++) { 00090 curNSWB = 0; 00091 for(int s = scaleSS[sb]; s < scaleSS[sb + 1]; s++) { 00092 curNSWB+=1; 00093 } 00094 nswb = std::max(nswb,curNSWB); 00095 } 00096 initFilters(gamma,divstart,divstep,fsmin,fsstep); 00097 } 00098 00099 void HmaxFL::initFilters(const float gamma, const float divstart, const float divstep, const int fsmin, const int fsstep) 00100 { 00101 // create the filters: 00102 typedef Image<float>* FloatImagePtr; 00103 filter = new FloatImagePtr[ns]; 00104 for(int s = 0; s < ns; s++) 00105 { 00106 filter[s] = new Image<float>[no]; 00107 for(int o = 0; o < no; o ++) 00108 { 00109 // create DoG filter: 00110 filter[s][o] = dogFilterHmax<float>( (float)o * 180.0F / (float)no, gamma, 00111 fsmin + fsstep * s, divstart + divstep * s); 00112 // normalize to zero mean: 00113 filter[s][o] -= mean(filter[s][o]); 00114 00115 // normalize to sqrt of sum-of-squares: 00116 filter[s][o] /= sqrt(sum(squared(filter[s][o]))); 00117 } 00118 } 00119 } 00120 00121 // ###################################################################### 00122 void HmaxFL::freeMem() 00123 { 00124 if (initialized) 00125 { 00126 initialized = false; 00127 freeC1Patches(); 00128 for(int s = 0; s < ns; s++) delete [] filter[s]; 00129 delete [] filter; 00130 } 00131 } 00132 00133 // ###################################################################### 00134 HmaxFL::~HmaxFL() 00135 { freeMem(); } 00136 00137 00138 00139 void HmaxFL::freeC1Patches() 00140 { 00141 if(c1Patches != 0){ 00142 for(unsigned int i=0;i<c1PatchSizes.size();i++){ 00143 delete [] c1Patches[i]; 00144 } 00145 delete [] c1Patches; 00146 c1Patches = 0; 00147 } 00148 } 00149 00150 00151 void HmaxFL::setC1Patches(Image<float>***&patches,std::vector<int> patchSizes,int nPatchesPerSize) 00152 { 00153 c1Patches = patches; 00154 c1PatchSizes = patchSizes; 00155 nC1PatchesPerSize = nPatchesPerSize; 00156 } 00157 00158 void HmaxFL::writeOutC1Patch(std::string dirName, Image<float>& patch, int i, int j,int k) 00159 { 00160 std::string fname; 00161 std::stringstream fnamestr; 00162 fnamestr.str(""); 00163 fnamestr << dirName << C1_PATCH_FILE_NAME << "." << i << "." << j << "." << k; 00164 fname = fnamestr.str(); 00165 Raster::WriteFloat(patch,FLOAT_NORM_0_255,fname,RASFMT_PNM); 00166 } 00167 00168 void HmaxFL::writeOutC1Patches(std::string dirName) 00169 { 00170 if(c1Patches == 0){ 00171 throw std::runtime_error("Cannot write out patches, patches undefined"); 00172 } 00173 std::string fname; 00174 std::stringstream fnamestr; 00175 for(unsigned int i=0;i<c1PatchSizes.size();i++) { 00176 for(int j=0;j<nC1PatchesPerSize;j++) { 00177 for(int k=0;k<no;k++) { 00178 writeOutC1Patch(dirName,c1Patches[i][j][k],i,j,k); 00179 } 00180 } 00181 } 00182 } 00183 00184 void HmaxFL::readInC1Patches(std::string dirName) 00185 { 00186 std::string fname; 00187 std::stringstream fnamestr; 00188 00189 int i,j,k; 00190 00191 // mi - patchSizes 00192 // mj - number of patches per size 00193 // mk - number of orientations 00194 int mi,mj,mk; 00195 Image<byte> input; 00196 00197 i=j=k=0; 00198 mi=mj=mk=0; 00199 // Size the number of patches first (very inefficient) 00200 while(1){ 00201 while(1){ 00202 while(1){ 00203 fnamestr.str(""); 00204 fnamestr << dirName << C1_PATCH_FILE_NAME << "." << i << "." << j << "." << k; 00205 fname = fnamestr.str(); 00206 if(Raster::fileExists(fname,RASFMT_PNM)) 00207 k++; 00208 else 00209 break; 00210 } 00211 if(mk==0 && k>0){ 00212 mk=k; 00213 } 00214 if(k == 0){ 00215 j--; // This j was too large 00216 break; // Either done, or j is also done 00217 } 00218 else if(mk != k){ 00219 throw std::runtime_error("Unexpected jagged array of patches in 3rd dimension"); 00220 } 00221 else { 00222 k=0; 00223 j++; 00224 } 00225 } 00226 if(mj==0 && j>0){ 00227 mj=j; 00228 } 00229 if(j<=0){ 00230 i--; // This i was too large 00231 break; // Should be done 00232 } 00233 else if(mj != j){ 00234 throw std::runtime_error("Unexpected jagged array of patches in 2nd dimension"); 00235 } 00236 else{ 00237 j=0; 00238 i++; 00239 } 00240 } 00241 if(mi==0 && i>0){ 00242 mi=i; 00243 } 00244 // The loop by design will end up with -1 -1 0 if the list is empty, this corrects 00245 mi+=1; 00246 mj+=1; 00247 00248 if(mi==0 || mj==0 || mk==0){ 00249 throw std::runtime_error("No patches found"); 00250 } 00251 00252 std::vector<int> patchSizes(mi); 00253 00254 // Allocate the space for the patch 3D array 00255 Image<float> ***patches = new Image<float>**[mi]; 00256 for(i=0;i<mi;i++){ 00257 patches[i] = new Image<float>*[mj]; 00258 for(j=0;j<mj;j++) { 00259 patches[i][j] = new Image<float>[mk]; 00260 } 00261 } 00262 00263 std::cout << mi << " " << mj << " " << mk << std::endl; 00264 00265 // Load the patches 00266 int w,h; 00267 for(i=0;i<mi;i++){ 00268 for(j=0;j<mj;j++){ 00269 for(k=0;k<mk;k++){ 00270 fnamestr.str(""); 00271 fnamestr << dirName << C1_PATCH_FILE_NAME << "." << i << "." << j << "." << k; 00272 fname = fnamestr.str(); 00273 input=Raster::ReadGray(fname,RASFMT_PNM); 00274 patches[i][j][k]=input; 00275 w=patches[i][j][k].getWidth(); 00276 h=patches[i][j][k].getHeight(); 00277 if(w != h){ 00278 // I'm leaking, patches not freed! 00279 throw std::runtime_error("Patches are expected to be square"); 00280 } 00281 if(j==0 && k==0){ 00282 patchSizes[i] = w; 00283 } 00284 else if(patchSizes[i] != w){ 00285 // I'm leaking, patches not freed! 00286 throw std::runtime_error("Patches in same band should all be the same size"); 00287 } 00288 } 00289 } 00290 } 00291 // Free the existing patches class member to prevent a leak 00292 freeC1Patches(); 00293 // Now set the new patches class member 00294 setC1Patches(patches,patchSizes,mj); 00295 } 00296 00297 Image<float>***& HmaxFL::getC1Patches() 00298 { 00299 return c1Patches; 00300 } 00301 00302 std::vector<int> HmaxFL::getC1PatchSizes() 00303 { 00304 return c1PatchSizes; 00305 } 00306 00307 int HmaxFL::getC1PatchesPerSize() 00308 { 00309 return nC1PatchesPerSize; 00310 } 00311 00312 void HmaxFL::extractRandC1Patch(std::string dirname, Image<float> & image, int index, std::vector<int> patchSizes) 00313 { 00314 Image<float> **c1Res; 00315 initC1(c1Res); 00316 getC1(image,c1Res); 00317 int sb = 0; // Only one scale band 00318 00319 int bsize1 = c1Res[sb][0].getWidth(); 00320 int bsize2 = c1Res[sb][0].getHeight(); 00321 printf("C1 Activation size is %d %d ScaleSS %d ScaleOL %d\n",bsize1,bsize2,scaleSS[0],c1SpaceOL); 00322 Image<float> patch; 00323 for(unsigned int i=0;i<patchSizes.size();i++) 00324 { 00325 int xy1 = int(floor((rand()-1.0F)/RAND_MAX*(bsize1-patchSizes[i]))); 00326 int xy2 = int(floor((rand()-1.0F)/RAND_MAX*(bsize2-patchSizes[i]))); 00327 Rectangle r = Rectangle::tlbrI(xy2,xy1,xy2+patchSizes[i]-1,xy1+patchSizes[i]-1); 00328 for(int k=0;k<no;k++) { 00329 patch = crop(c1Res[sb][k],r); 00330 patch *= 255*10; 00331 writeOutC1Patch(dirname,patch,i,index,k); 00332 } 00333 } 00334 clearC1(c1Res); 00335 } 00336 00337 void HmaxFL::extractRandC1Patches(std::string dirname, Image<float> *& posTrainingImages, int numPosTrainImages, std::vector<int> patchSizes, int nPatchesPerSize) 00338 { 00339 std::srand(time(0)); 00340 for(int i=0;i<nPatchesPerSize;i++){ 00341 // Randomly select an image from the list 00342 unsigned int imInd = static_cast<unsigned int>(floor((rand()-1.0F)/RAND_MAX*numPosTrainImages)); 00343 extractRandC1Patch(dirname,posTrainingImages[imInd],i,patchSizes); 00344 } 00345 readInC1Patches(dirname); 00346 } 00347 00348 //computes the euclidean distance between each patch and all crops of images of 00349 //similar size. 00350 void HmaxFL::windowedPatchDistance(Image <float>* &images, int nimages, Image <float> *& patches, int npatches, Image<float>& D) 00351 { 00352 00353 // sum_over_p(W(p)-I(p))^2 is factored as 00354 // sum_over_p(W(p)^2) + sum_over_p(I(p)^2) - 2*(W(p)*I(p)); 00355 00356 // Im and Patch must have the same number of channels 00357 if(nimages != npatches) { 00358 // Error 00359 throw std::invalid_argument("Number of layers must be equal between patches and images"); 00360 } 00361 00362 Image<float> imSq(images[0].getWidth(),images[0].getHeight(),ZEROS); 00363 Image<float> imSqFiltered; 00364 Image<float> pi(images[0].getWidth(),images[0].getHeight(),ZEROS); 00365 float sumPSq=0; 00366 00367 int s1 = patches[0].getWidth(); 00368 int s2 = patches[0].getHeight(); 00369 00370 for(int i=0;i<nimages;i++) { 00371 00372 //s = size(Patch); 00373 //Psqr = sum(sum(sum(Patch.^2))); 00374 sumPSq += sum(patches[i]*patches[i]); 00375 //Imsq = Im.^2; 00376 //Imsq = sum(Imsq,3); 00377 imSq += images[i]*images[i]; 00378 } 00379 00380 // tt, ll, bb, rr 00381 Rectangle sumSupport = Rectangle::tlbrI(int(ceil(s2/2)-1),int(ceil(s1/2)-1),int(floor(s2/2)),int(floor(s1/2))); 00382 //Imsq = sumfilter(Imsq,sum_support); 00383 sumFilter(imSq,sumSupport,imSqFiltered); 00384 for(int i=0;i<nimages;i++) { 00385 //PI = PI + conv2(Im(:,:,i),Patch(:,:,i), 'same'); 00386 pi += convolve(images[i],patches[i],CONV_BOUNDARY_ZERO); 00387 } 00388 00389 //D = Imsq - 2 * PI + Psqr + 10^-10; 00390 D = imSqFiltered - pi*2 + sumPSq + 10E-10F; 00391 00392 } 00393 00394 00395 // ###################################################################### 00396 void HmaxFL::getC2(const Image<float>& input, float**& c2Res) 00397 { 00398 if(c1Patches == 0){ 00399 throw std::runtime_error("C1Patches must be initialized before determining c2 layer response"); 00400 } 00401 00402 // allocate buffers for intermediary results: 00403 00404 Image<float> **c1Res; 00405 initC1(c1Res); 00406 00407 // ****************************** 00408 // *** Compute S1/C1 output: 00409 // ****************************** 00410 getC1(input,c1Res); 00411 00412 // ****************************** 00413 // *** Compute S2/C2 output: 00414 // ****************************** 00415 // New way, use c1Patches and windowedPatchDistance 00416 00417 Image<float> s2Res; 00418 00419 for(unsigned int ps = 0; ps < c1PatchSizes.size(); ps++) { 00420 for(int pi = 0; pi <nC1PatchesPerSize; pi++) { 00421 c2Res[ps][pi] = FLT_MAX; 00422 for(int sb = 0; sb < nsb; sb++){ 00423 windowedPatchDistance(c1Res[sb],no,c1Patches[ps][pi],no,s2Res); 00424 //printCorners("s2Res",s2Res,ps==0&&pi==0&&sb==0); 00425 float mi, ma; getMinMax(s2Res,mi,ma); 00426 c2Res[ps][pi] = std::min(c2Res[ps][pi],mi); 00427 } 00428 } 00429 } 00430 00431 clearC1(c1Res); 00432 } 00433 00434 00435 00436 // ###################################################################### 00437 /* So things look consistent in everyone's emacs... */ 00438 /* Local Variables: */ 00439 /* indent-tabs-mode: nil */ 00440 /* End: */