00001 /*!@file CUDA/CudaHmaxFL.C Riesenhuber & Poggio's HMAX model for object recognition */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00005 // University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Laurent Itti <itti@usc.edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/CUDA/CudaHmaxFL.C $ 00035 // $Id: CudaHmaxFL.C 12962 2010-03-06 02:13:53Z irock $ 00036 // 00037 00038 #include "CUDA/CudaHmaxFL.H" 00039 #include "CUDA/CudaHmax.H" 00040 #include "CUDA/CudaFilterOps.H" 00041 #include "CUDA/CudaImage.H" 00042 //#include "CUDA/CudaKernels.H" // for cudaDogFilterHmax() 00043 #include "Image/Kernels.H" // for dogFilterHmax() 00044 #include "CUDA/CudaMathOps.H" 00045 #include "CUDA/CudaConvolutions.H" // for cudaConvolve() etc. 00046 #include "Image/Normalize.H" 00047 #include "CUDA/CudaCutPaste.H" 00048 #include "Raster/Raster.H" 00049 #include "Raster/RasterFileFormat.H" 00050 #include "Util/MathFunctions.H" 00051 #include "Util/Timer.H" 00052 #include "Util/Types.H" 00053 #include "Util/log.H" 00054 #include "Util/safecopy.H" 00055 00056 #include <cmath> 00057 #include <cstdio> 00058 #include <cstdlib> 00059 #include <cfloat> 00060 #include <limits> 00061 #include <sstream> 00062 #include <stdexcept> 00063 00064 // ###################################################################### 00065 CudaHmaxFL::CudaHmaxFL() 00066 { initialized = false; } 00067 00068 // ###################################################################### 00069 CudaHmaxFL::CudaHmaxFL(MemoryPolicy mp, int dev, const int nori, const std::vector<int>& spacess, 00070 const std::vector<int>& scaless, const int c1spaceol, 00071 const bool angleflag, const float s2t, const float s2s, 00072 const float gamma, const float divstart, const float divstep, 00073 const int fsmin, const int fsstep) 00074 { 00075 initialized = false; 00076 init(mp,dev,nori, spacess, scaless, c1spaceol, angleflag, s2t, s2s, gamma, divstart, divstep, fsmin, fsstep); 00077 } 00078 00079 // ###################################################################### 00080 void CudaHmaxFL::init(MemoryPolicy mp, int dev, const int nori, const std::vector<int>& spacess, 00081 const std::vector<int>& scaless, const int c1spaceol, 00082 const bool angleflag, const float s2t, const float s2s, 00083 const float gamma, const float divstart, const float divstep, 00084 const int fsmin, const int fsstep) 00085 { 00086 CudaHmax::init(mp,dev,nori,spacess,scaless,c1spaceol,angleflag,s2t,s2s); 00087 int curNSWB; 00088 c1Patches = 0; 00089 nswb = 0; 00090 // Determine the number of scales within band 00091 for (int sb = 0; sb < nsb; sb ++) { 00092 curNSWB = 0; 00093 for(int s = scaleSS[sb]; s < scaleSS[sb + 1]; s++) { 00094 curNSWB+=1; 00095 } 00096 nswb = std::max(nswb,curNSWB); 00097 } 00098 initFilters(gamma,divstart,divstep,fsmin,fsstep); 00099 } 00100 00101 void CudaHmaxFL::initFilters(const float gamma, const float divstart, const float divstep, const int fsmin, const int fsstep) 00102 { 00103 // create the filters: 00104 typedef CudaImage<float>* FloatImagePtr; 00105 filter = new FloatImagePtr[ns]; 00106 for(int s = 0; s < ns; s++) 00107 { 00108 filter[s] = new CudaImage<float>[no]; 00109 for(int o = 0; o < no; o ++) 00110 { 00111 // create DoG filter: 00112 Image<float> tmp = dogFilterHmax<float>((float)o * 180.0F / (float) no, gamma, 00113 fsmin + fsstep * s, divstart + divstep * s); 00114 filter[s][o] = CudaImage<float>(tmp,itsMp,itsDev); 00115 // filter[s][o] = cudaDogFilterHmax(itsMp,itsDev, (float)o * 180.0 / (float)no, gamma, 00116 // fsmin + fsstep * s, divstart + divstep * s); 00117 // normalize to zero mean: 00118 filter[s][o] -= cudaGetAvg(filter[s][o]); 00119 00120 // normalize to sqrt of sum-of-squares: 00121 filter[s][o] /= cudaSqrt(cudaGetSum(cudaSquared(filter[s][o]))); 00122 } 00123 } 00124 } 00125 00126 // ###################################################################### 00127 void CudaHmaxFL::freeMem() 00128 { 00129 if (initialized) 00130 { 00131 freeC1Patches(); 00132 CudaHmax::freeMem(); 00133 initialized = false; 00134 } 00135 } 00136 00137 // ###################################################################### 00138 CudaHmaxFL::~CudaHmaxFL() 00139 { freeMem(); } 00140 00141 00142 00143 void CudaHmaxFL::freeC1Patches() 00144 { 00145 if(c1Patches != 0){ 00146 for(unsigned int i=0;i<c1PatchSizes.size();i++){ 00147 delete [] c1Patches[i]; 00148 delete [] sumPSq[i]; 00149 } 00150 delete [] c1Patches; 00151 delete [] sumPSq; 00152 c1Patches = 0; 00153 } 00154 } 00155 00156 00157 void CudaHmaxFL::setC1Patches(CudaImage<float>***&patches,std::vector<int> patchSizes,int nPatchesPerSize) 00158 { 00159 c1Patches = patches; 00160 c1PatchSizes = patchSizes; 00161 nC1PatchesPerSize = nPatchesPerSize; 00162 sumPSq = new float*[c1PatchSizes.size()]; 00163 CudaImage<float> cSum(1,1,NO_INIT,itsMp,itsDev); 00164 for(unsigned int i=0;i<c1PatchSizes.size();i++) 00165 { 00166 sumPSq[i] = new float[nC1PatchesPerSize]; 00167 for(int j=0;j<nC1PatchesPerSize;j++) { 00168 cSum.clear(); 00169 for(int k=0;k<no;k++) { 00170 cSum += cudaGetSum(cudaSquared(patches[i][j][k])); 00171 } 00172 sumPSq[i][j] = cudaGetScalar(cSum); 00173 } 00174 } 00175 } 00176 00177 void CudaHmaxFL::writeOutC1Patches(std::string dirName) 00178 { 00179 if(c1Patches == 0){ 00180 throw std::runtime_error("Cannot write out patches, patches undefined"); 00181 } 00182 std::string fname; 00183 std::stringstream fnamestr; 00184 for(unsigned int i=0;i<c1PatchSizes.size();i++) { 00185 for(int j=0;j<nC1PatchesPerSize;j++) { 00186 for(int k=0;k<no;k++) { 00187 fnamestr.str(""); 00188 fnamestr << dirName << C1_PATCH_FILE_NAME << "." << i << "." << j << "." << k; 00189 fname = fnamestr.str(); 00190 std::cout << fname << std::endl; 00191 Raster::WriteFloat(c1Patches[i][j][k].exportToImage(),FLOAT_NORM_0_255,fname,RASFMT_PNM); 00192 } 00193 } 00194 } 00195 } 00196 00197 void CudaHmaxFL::readInC1Patches(std::string dirName) 00198 { 00199 std::string fname; 00200 std::stringstream fnamestr; 00201 00202 int i,j,k; 00203 00204 // mi - patchSizes 00205 // mj - number of patches per size 00206 // mk - number of orientations 00207 int mi,mj,mk; 00208 Image<byte> input; 00209 00210 i=j=k=0; 00211 mi=mj=mk=0; 00212 // Size the number of patches first (very inefficient) 00213 while(1){ 00214 while(1){ 00215 while(1){ 00216 fnamestr.str(""); 00217 fnamestr << dirName << C1_PATCH_FILE_NAME << "." << i << "." << j << "." << k; 00218 fname = fnamestr.str(); 00219 if(Raster::fileExists(fname,RASFMT_PNM)) 00220 k++; 00221 else 00222 break; 00223 } 00224 if(mk==0 && k>0){ 00225 mk=k; 00226 } 00227 if(k == 0){ 00228 j--; // This j was too large 00229 break; // Either done, or j is also done 00230 } 00231 else if(mk != k){ 00232 throw std::runtime_error("Unexpected jagged array of patches in 3rd dimension"); 00233 } 00234 else { 00235 k=0; 00236 j++; 00237 } 00238 } 00239 if(mj==0 && j>0){ 00240 mj=j; 00241 } 00242 if(j<=0){ 00243 i--; // This i was too large 00244 break; // Should be done 00245 } 00246 else if(mj != j){ 00247 throw std::runtime_error("Unexpected jagged array of patches in 2nd dimension"); 00248 } 00249 else{ 00250 j=0; 00251 i++; 00252 } 00253 } 00254 if(mi==0 && i>0){ 00255 mi=i; 00256 } 00257 // The loop by design will end up with -1 -1 0 if the list is empty, this corrects 00258 mi+=1; 00259 mj+=1; 00260 printf("Patches of %d,%d,%d Found\n",mi,mj,mk); 00261 if(mi==0 || mj==0 || mk==0){ 00262 throw std::runtime_error("No patches found"); 00263 } 00264 00265 std::vector<int> patchSizes(mi); 00266 00267 // Allocate the space for the patch 3D array 00268 CudaImage<float> ***patches = new CudaImage<float>**[mi]; 00269 for(i=0;i<mi;i++){ 00270 patches[i] = new CudaImage<float>*[mj]; 00271 for(j=0;j<mj;j++) { 00272 patches[i][j] = new CudaImage<float>[mk]; 00273 } 00274 } 00275 00276 std::cout << mi << " " << mj << " " << mk << std::endl; 00277 00278 // Load the patches 00279 int w,h; 00280 for(i=0;i<mi;i++){ 00281 for(j=0;j<mj;j++){ 00282 for(k=0;k<mk;k++){ 00283 fnamestr.str(""); 00284 fnamestr << dirName << C1_PATCH_FILE_NAME << "." << i << "." << j << "." << k; 00285 fname = fnamestr.str(); 00286 input=Raster::ReadGray(fname,RASFMT_PNM); 00287 patches[i][j][k]=CudaImage<float>(Image<float>(input),itsMp,itsDev); 00288 w=patches[i][j][k].getWidth(); 00289 h=patches[i][j][k].getHeight(); 00290 if(w != h){ 00291 // I'm leaking, patches not freed! 00292 throw std::runtime_error("Patches are expected to be square"); 00293 } 00294 if(j==0 && k==0){ 00295 patchSizes[i] = w; 00296 } 00297 else if(patchSizes[i] != w){ 00298 // I'm leaking, patches not freed! 00299 throw std::runtime_error("Patches in same band should all be the same size"); 00300 } 00301 } 00302 } 00303 } 00304 // Free the existing patches class member to prevent a leak 00305 freeC1Patches(); 00306 // Now set the new patches class member 00307 setC1Patches(patches,patchSizes,mj); 00308 } 00309 00310 CudaImage<float>***& CudaHmaxFL::getC1Patches() 00311 { 00312 return c1Patches; 00313 } 00314 00315 std::vector<int> CudaHmaxFL::getC1PatchSizes() 00316 { 00317 return c1PatchSizes; 00318 } 00319 00320 int CudaHmaxFL::getC1PatchesPerSize() 00321 { 00322 return nC1PatchesPerSize; 00323 } 00324 00325 void CudaHmaxFL::extractRandC1Patches(Image<float> *& posTrainingImages, int numPosTrainImages, std::vector<int> patchSizes, int numPatchesPerSize, int no) 00326 { 00327 // Create a temp CudaHmaxFL object to extract C1Patches 00328 std::vector<int> c1ScaleSS(2); 00329 c1ScaleSS[0] = 1; c1ScaleSS[1] = 3; 00330 std::vector<int> c1SpaceSS(2); 00331 c1SpaceSS[0] = 10; c1SpaceSS[1] = 11; 00332 // desired frame sizes [11 and 13] 00333 CudaHmaxFL hmax(itsMp,itsDev,no,c1SpaceSS,c1ScaleSS,2,true,1.0F,1.0F,0.3F,4.05F,-0.05F,11,2); 00334 00335 00336 CudaImage<float> ***patches = new CudaImage<float>**[patchSizes.size()]; 00337 for(unsigned int i=0;i<patchSizes.size();i++){ 00338 patches[i] = new CudaImage<float>*[numPatchesPerSize]; 00339 for(int j=0;j<numPatchesPerSize;j++) { 00340 patches[i][j] = new CudaImage<float>[no]; 00341 for(int k=0;k<no;k++) { 00342 // Set patches to be all zeros 00343 patches[i][j][k].resize(patchSizes[i],patchSizes[i],ZEROS); 00344 } 00345 } 00346 } 00347 00348 CudaImage<float> **c1Res; 00349 CudaImage<float> stim; 00350 std::srand(time(0)); 00351 int sb = 0; // Only one scale band 00352 00353 for(int i=0;i<numPatchesPerSize;i++){ 00354 // Randomly select an image from the list 00355 unsigned int imInd = static_cast<unsigned int>(floor((rand()-1.0F)/RAND_MAX*numPosTrainImages)); 00356 stim = CudaImage<float>(posTrainingImages[imInd],itsMp,itsDev); 00357 hmax.initC1(c1Res); 00358 hmax.getC1(stim,c1Res); 00359 00360 int bsize1 = c1Res[sb][0].getWidth(); 00361 int bsize2 = c1Res[sb][0].getHeight(); 00362 hmax.printCorners("input",stim,i<5); 00363 for(unsigned int j=0;j<patchSizes.size();j++) { 00364 int xy1 = int(floor((rand()-1.0F)/RAND_MAX*(bsize1-patchSizes[j]))); 00365 int xy2 = int(floor((rand()-1.0F)/RAND_MAX*(bsize2-patchSizes[j]))); 00366 Rectangle r = Rectangle::tlbrI(xy2,xy1,xy2+patchSizes[j]-1,xy1+patchSizes[j]-1); 00367 for(int k=0;k<no;k++) { 00368 patches[j][i][k] = cudaCrop(c1Res[sb][k],r); 00369 patches[j][i][k] *= 255*10; 00370 } 00371 } 00372 hmax.printCorners("patch[0][i][0]",patches[0][i][0],i<5); 00373 hmax.clearC1(c1Res); 00374 00375 } 00376 00377 setC1Patches(patches,patchSizes,numPatchesPerSize); 00378 // nori, spacess,scaless, c1spaceol = 2, angleflag = true, s2t = 1.0F, 00379 // s2s = 1.0F, stdmin = 1.75F,stdstep = 0.5F, fsmin = 3, fsstep = 1) 00380 00381 //div = [4:-.05:3.2]; 00382 //Div = div(3:4); 00383 00384 } 00385 00386 //computes the euclidean distance between each patch and all crops of images of 00387 //similar size. 00388 void CudaHmaxFL::windowedPatchDistance(CudaImage <float>* &images, int nimages, CudaImage <float> *& patches, int npatches, CudaImage<float>& D, float sumSquaredPatch) 00389 { 00390 00391 // sum_over_p(W(p)-I(p))^2 is factored as 00392 // sum_over_p(W(p)^2) + sum_over_p(I(p)^2) - 2*(W(p)*I(p)); 00393 00394 // Im and Patch must have the same number of channels 00395 if(nimages != npatches) { 00396 // Error 00397 throw std::invalid_argument("Number of layers must be equal between patches and images"); 00398 } 00399 00400 CudaImage<float> imSq(images[0].getWidth(),images[0].getHeight(),ZEROS,itsMp,itsDev); 00401 CudaImage<float> imSqFiltered; 00402 CudaImage<float> pi(images[0].getWidth(),images[0].getHeight(),ZEROS,itsMp,itsDev); 00403 //CudaImage<float> sumPSquared= CudaImage<float>(1,1,ZEROS,itsMp,itsDev); 00404 00405 int s1 = patches[0].getWidth(); 00406 int s2 = patches[0].getHeight(); 00407 00408 for(int i=0;i<nimages;i++) { 00409 00410 //s = size(Patch); 00411 //Psqr = sum(sum(sum(Patch.^2))); 00412 //sumPSquared += cudaGetSum(cudaSquared(patches[i])); 00413 //Imsq = Im.^2; 00414 //Imsq = sum(Imsq,3); 00415 imSq += cudaSquared(images[i]); 00416 } 00417 00418 // tt, ll, bb, rr 00419 Rectangle sumSupport = Rectangle::tlbrI(int(ceil(s2/2)-1),int(ceil(s1/2)-1),int(floor(s2/2)),int(floor(s1/2))); 00420 //Imsq = sumfilter(Imsq,sum_support); 00421 sumFilter(imSq,sumSupport,imSqFiltered); 00422 for(int i=0;i<nimages;i++) { 00423 //PI = PI + conv2(Im(:,:,i),Patch(:,:,i), 'same'); 00424 pi += cudaConvolve(images[i],patches[i],CONV_BOUNDARY_ZERO); 00425 } 00426 00427 //D = Imsq - 2 * PI + Psqr + 10^-10; 00428 //D = imSqFiltered - pi*2 + sumPSquared + 10E-10F; 00429 D = imSqFiltered - (pi*2 + sumSquaredPatch + 10E-10F); 00430 00431 } 00432 00433 00434 // ###################################################################### 00435 void CudaHmaxFL::getC2(const CudaImage<float>& input, float**& c2Res) 00436 { 00437 if(c1Patches == 0){ 00438 throw std::runtime_error("C1Patches must be initialized before determining c2 layer response"); 00439 } 00440 00441 // allocate buffers for intermediary results: 00442 00443 CudaImage<float> **c1Res; 00444 initC1(c1Res); 00445 Timer tim; 00446 tim.reset(); 00447 // ****************************** 00448 // *** Compute S1/C1 output: 00449 // ****************************** 00450 getC1(input,c1Res); 00451 00452 // ****************************** 00453 // *** Compute S2/C2 output: 00454 // ****************************** 00455 // New way, use c1Patches and windowedPatchDistance 00456 00457 CudaImage<float> s2Res; 00458 00459 00460 for(unsigned int ps = 0; ps < c1PatchSizes.size(); ps++) { 00461 for(int pi = 0; pi <nC1PatchesPerSize; pi++) { 00462 c2Res[ps][pi] = FLT_MAX; 00463 for(int sb = 0; sb < nsb; sb++){ 00464 windowedPatchDistance(c1Res[sb],no,c1Patches[ps][pi],no,s2Res,sumPSq[ps][pi]); 00465 //printCorners("s2Res",s2Res,ps==0&&pi==0&&sb==0); 00466 CudaImage<float> cmi; 00467 cudaGetMin(s2Res,cmi); 00468 float mi = cudaGetScalar(cmi); 00469 c2Res[ps][pi] = std::min(c2Res[ps][pi],mi); 00470 } 00471 } 00472 } 00473 printf("S2/C2 Layers %f secs\n",tim.getSecs()); 00474 00475 clearC1(c1Res); 00476 } 00477 00478 00479 00480 // ###################################################################### 00481 /* So things look consistent in everyone's emacs... */ 00482 /* Local Variables: */ 00483 /* indent-tabs-mode: nil */ 00484 /* End: */