00001 /*!@file HMAX/test-hmax.C This is a test of the C++ hmax object recognition */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00005 // University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Laurent Itti <itti@usc.edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/HMAX/test-hmax.C $ 00035 // $Id: test-hmax.C 9412 2008-03-10 23:10:15Z farhan $ 00036 // 00037 00038 #include "Channels/Jet.H" 00039 #include "GUI/XWindow.H" 00040 #include "Image/DrawOps.H" 00041 #include "Image/Image.H" 00042 #include "Image/ImageSet.H" 00043 #include "Image/Kernels.H" // for dogFilterHmax() 00044 #include "Image/MathOps.H" 00045 #include "Image/ShapeOps.H" 00046 #include "Raster/Raster.H" 00047 #include "Util/log.H" 00048 00049 #include <cmath> 00050 #include <dirent.h> 00051 #include <fstream> 00052 #include <iostream> 00053 #include <string> 00054 #include <vector> 00055 00056 // show debugging info on memory alloc/free 00057 //#define ARRAYDATA_DEBUG_ALLOC 00058 00059 // include class definitions for all classes that define or use 00060 // templates. See MakeObjects for why we do that strange thing! 00061 00062 inline float sqr(float x) { 00063 return (x * x); 00064 } 00065 00066 std::vector<std::string> readDir(std::string inName) { 00067 DIR *dp = opendir(inName.c_str()); 00068 dirent *dirp; 00069 std::vector<std::string> fList; 00070 while ((dirp = readdir(dp)) != NULL ) { 00071 if (dirp->d_name[0] != '.') 00072 fList.push_back(inName + '/' + std::string(dirp->d_name)); 00073 } 00074 std::cout << "# of files ...: " << fList.size() << std::endl; 00075 std::cout << "file list : " << std::endl; 00076 for (unsigned int i=0; i<fList.size(); i++) 00077 std::cout << "\t" << fList[i] << std::endl; 00078 return fList; 00079 } 00080 00081 // ###################################################################### 00082 // ##### Main Program: 00083 // ###################################################################### 00084 //! The main routine. Read images, process, display results 00085 int main(int argc, char **argv) 00086 { 00087 float currStd = 1.75; 00088 float stdStep = 0.5; 00089 float currOri = 0.0; 00090 float oriStep = 45.0; 00091 int numOri = 4; 00092 int numScale = 12; 00093 int numScaleBand = 4; 00094 int minFS = 7; 00095 int maxFS = 29; 00096 int sSFS = 2; 00097 int fieldSize = 0; 00098 int cropBegin, cropEnd; 00099 int sizeOfPR = 4; // size for spatial pooling range in c1 00100 int poolRange[] = {4, 6, 9, 12}; 00101 int scaleSS[] = {1, 3, 6, 9, 13}; 00102 int fSiz[numScale*numScaleBand]; 00103 int c1OL = 2; // define the ovelap of pooling range in c1 00104 int c1R = 0; //define c1's abstraction from s1 : receptive range 00105 int imgHeight = 128; 00106 int imgWidth = 128; 00107 int c1RSize = 0; 00108 int resImg, resX, resY; // index of c1Res[] 00109 int c1Size, c2Size, s2Size; 00110 int c2x, c2y; 00111 float res = 0; 00112 float s2Res = 0; 00113 int angleFlag = 1; 00114 int s2Target = 0; //the mean of the gaussian: center 00115 int target = 0; 00116 int tItr[] = {2, 600, 1200}; 00117 00118 // ########## parse options: 00119 char imname[1024]; imname[0] = '\0'; 00120 strncpy(imname, argv[1], 1023); 00121 target = atoi(argv[2]); 00122 std::string inName = imname; 00123 00124 std::vector<std::string> fileList = readDir(inName); 00125 00126 // ########## file for restore the result 00127 std::ofstream wFile ("weightFile"); 00128 00129 // weighting for IT 00130 float eta = 0.2; 00131 Image<float> wt(16, 16, NO_INIT); 00132 00133 // buffer for intermediate c1 response 00134 Image<float> c1Buff[numScaleBand]; 00135 // c1 result : 4 * 4 =16 features 00136 Image<float> c1Res[numOri*sizeOfPR]; 00137 // c2 result : a 16 * 16 = 256 pixels image 00138 Image<float> c2Res(16, 16, NO_INIT); 00139 Image<byte> im; 00140 Image<float> filters[numOri*numScale]; 00141 Image<float> tmpFilt; 00142 00143 // display weights: 00144 XWindow xww(Dims(256, 256), -1, -1, "C2->IT Weights"); 00145 Image<byte> xwi(256, 256, NO_INIT); 00146 XWindow xwimg(Dims(imgWidth, imgHeight), -1, -1, "Original Image"); 00147 00148 //for(unsigned int tItrInd = 0; tItrInd < 3; tItrInd++) { 00149 for(int itr=0; itr < tItr[0]; itr++) { 00150 for(unsigned int imgInd=0; imgInd<fileList.size(); imgInd++) { 00151 int fInd = 0; 00152 std::cout << "**********img..." << imgInd << std::endl; 00153 std::cout << "target..." << target << std::endl; 00154 00155 //read input Image and convert to float 00156 im = Raster::ReadGray(fileList[imgInd], RASFMT_PNM); 00157 std::cout << "the size of image...im...." << im.getSize() << std::endl; 00158 rescale(im, imgWidth, imgHeight); 00159 00160 xwimg.drawImage(im); 00161 00162 Image<float> fim = im; 00163 //Raster::VisuFloat(fim,FLOAT_NORM_0_255,"float-image.pgm"); 00164 00165 //*************************************************** 00166 //making filters : numOri(4) * numScale(12) = 48 filters 00167 for(int filtSize=minFS; filtSize<=maxFS; filtSize+=sSFS) { 00168 fieldSize = (int)(filtSize * ::sqrt(2) + 1); 00169 fieldSize = fieldSize + 1 + (fieldSize % 2); 00170 cropBegin = (int)((fieldSize-filtSize) / 2) + 1; 00171 cropEnd = cropBegin + filtSize; 00172 currStd += stdStep; 00173 currOri = 0; 00174 for(int oriInd = 0; oriInd < numOri; oriInd ++) { 00175 tmpFilt.resize(fieldSize,fieldSize,true); 00176 filters[fInd].resize(maxFS*maxFS,1,true); 00177 currOri += oriStep; 00178 tmpFilt = dogFilterHmax<float>(currStd,currOri,cropBegin,cropEnd); 00179 00180 fSiz[fInd] = tmpFilt.getHeight(); 00181 int filtElm = 0; 00182 for(int y = 0; y < filtSize; y++) { 00183 for(int x = 0; x < filtSize; x ++) { 00184 filters[fInd].setVal(filtElm,0,(tmpFilt.getVal(x,y))); 00185 filtElm++; 00186 } 00187 } 00188 //Raster::VisuFloat(filters[fInd],FLOAT_NORM_0_255,"dog-filters.pgm"); 00189 fInd ++; 00190 } //end of oriInd 00191 } //end of filtSize : end of making filters 00192 //**********************************end of making filters 00193 00194 //************* copy img and pad with 0s : expand by maxFS size 00195 Image<float> fExpnd(imgHeight+maxFS, imgWidth+maxFS, NO_INIT); 00196 int xVal = 0; 00197 int yVal = 0; 00198 //fExpnd.resize(imgHeight+maxFS, imgWidth+maxFS, true); 00199 for(int y = 0; y < imgHeight+maxFS; y ++) { 00200 if( y < (int)(maxFS/2) || y >= imgHeight+(int)(maxFS/2)) { 00201 for(int x = 0; x < imgWidth+maxFS; x ++) 00202 fExpnd.setVal(x,y,0.0); 00203 } // end of if 00204 else { 00205 for(int x = 0; x < imgWidth+maxFS; x ++) { 00206 if( x < (int)(maxFS/2) || x >= imgWidth+(int)(maxFS/2)) 00207 fExpnd.setVal(x,y,0.0); 00208 else { 00209 fExpnd.setVal(x,y,fim.getVal(xVal,yVal)); 00210 xVal ++; 00211 } 00212 } 00213 xVal = 0; 00214 yVal ++; 00215 } // end of else 00216 } //end of copy img 00217 00218 //*********************************** S1/C1 00219 int bStart = (int)(maxFS/2); 00220 int currBX = 0; 00221 int currBY = 0; 00222 fInd = 0; 00223 00224 //loop for scales of filters 00225 for(int scaleInd = 0; scaleInd < numScaleBand; scaleInd ++) { 00226 //initialize c1Buff[] 00227 for(int i = 0; i < numScaleBand; i ++) { 00228 c1Buff[i].resize(imgHeight,imgWidth,true); 00229 } 00230 for(int currScale = scaleSS[scaleInd]; 00231 currScale < scaleSS[scaleInd+1]; currScale++) { 00232 for(int oriInd = 0; oriInd < numOri; oriInd ++) { 00233 int fSize = fSiz[numOri * (currScale - 1) + oriInd]; 00234 for(int y = 0; y < imgHeight; y ++) { 00235 for(int x = 0; x < imgWidth; x ++) { 00236 float intRes = 0.0; 00237 float imgLen = 0.0; 00238 currBX = bStart - (int)(fSize/2) + x; 00239 currBY = bStart - (int)(fSize/2) + y; 00240 for(int hig = 0; hig < fSize; hig ++) { 00241 for(int wid = 0; wid < fSize; wid ++) { 00242 imgLen += sqr(fExpnd.getVal(currBX+wid,currBY+hig)); 00243 intRes += fExpnd.getVal(currBX+wid,currBY+hig) 00244 * filters[fInd].getVal(wid+(hig*fSize),0); 00245 } 00246 }// end of a filter 00247 if(angleFlag && (imgLen > 0.0)) intRes /= ::sqrt(imgLen); 00248 //rectify : take the absolute value 00249 if(intRes < 0.0) intRes = -intRes; 00250 if(intRes > c1Buff[oriInd].getVal(x,y)) 00251 c1Buff[oriInd].setVal(x,y,intRes); 00252 } // x 00253 }// end of an img : y 00254 fInd ++; 00255 }//end of orientations of filters 00256 }//end of currScale 00257 00258 //decide pooling range 00259 c1R = (int)ceil((double)poolRange[scaleInd]/c1OL); 00260 int c1BuffInd = 0; 00261 for(int oInd = 0; oInd < numOri; oInd ++) { 00262 //initialize c1Res[]:just for a pooling range size of scale band:4 00263 c1RSize = (int)ceil((double)imgHeight/c1R); 00264 //trick for reverse c1Res order : <3,2,1,0>, <7,6,5,4>, <11,6,5,4> 00265 //and <15,14,13,12> 00266 resImg = (scaleInd * numScaleBand) + 3 - oInd; 00267 c1Res[resImg].resize(c1RSize,c1RSize,true); 00268 resX = resY = 0; 00269 //loop for an c1Buff 00270 for(int y = 0; y < imgHeight; y += c1R) { 00271 for(int x = 0; x < imgWidth; x += c1R) { 00272 //loop for an pooling range 00273 for(int yInPR = y; (yInPR-y < poolRange[scaleInd]) 00274 && (yInPR < imgHeight); yInPR ++) { 00275 for(int xInPR = x; (xInPR-x < poolRange[scaleInd]) 00276 && (xInPR < imgWidth); xInPR ++) { 00277 if(c1Buff[c1BuffInd].getVal(xInPR,yInPR) 00278 > c1Res[resImg].getVal(resX, resY)) 00279 c1Res[resImg].setVal (resX,resY, 00280 c1Buff[c1BuffInd].getVal(xInPR,yInPR)); 00281 } //: xInPR 00282 } //: yInPR 00283 resX ++; 00284 } //: x 00285 resY ++; 00286 resX = 0; 00287 }//end of an c1Buff loop : y 00288 c1BuffInd++; 00289 } 00290 }//end of scales of filters 00291 00292 Image<float> c1All; 00293 c1All = concatArray(c1Res,16,4,64,64); 00294 Raster::VisuFloat(c1All,FLOAT_NORM_0_255,"c1All.pgm"); 00295 00296 //***************************************************** S2/C2 begin 00297 //The result of C1 layer is on c1Res[] 00298 //now for S2 layer : combine C1 results 00299 s2Size = c2Size = 0; 00300 c2x = c2y = 0; 00301 for(int oInd1 = 0; oInd1 < numOri; oInd1++) 00302 for(int oInd2 = 0; oInd2 < numOri; oInd2++) 00303 for(int oInd3 = 0; oInd3 < numOri; oInd3++) 00304 for(int oInd4 = 0; oInd4 < numOri; oInd4++) { 00305 c2Res.setVal(c2x,c2y,-1e10); 00306 res = -1e10; 00307 for(int scaleInd = 0; scaleInd < numScaleBand; scaleInd++) { 00308 c1Size = c1Res[scaleInd * numOri].getHeight(); 00309 for(int y = c1OL; y < c1Size; y ++) { 00310 for(int x = c1OL; x < c1Size; x ++) { 00311 s2Res = ( 00312 sqr(c1Res[(scaleInd*numOri) + 00313 oInd1].getVal(y,x) - s2Target) + 00314 sqr(c1Res[(scaleInd*numOri) + 00315 oInd2].getVal(y,x-c1OL) - s2Target) + 00316 sqr(c1Res[(scaleInd*numOri) + 00317 oInd3].getVal(y-c1OL,x) - s2Target) + 00318 sqr(c1Res[(scaleInd*numOri) + 00319 oInd4].getVal(y-c1OL,x-c1OL) - s2Target)); 00320 if(s2Res > res) res = s2Res; 00321 s2Size ++; 00322 }// end of "x" for c1Size 00323 }// end of "y" for c1Size 00324 if(res > c2Res.getVal(c2x, c2y)) 00325 c2Res.setVal(c2x, c2y, res); 00326 } //end of scaleInd 00327 c2Size++; 00328 if(c2x >= 15) { 00329 c2x = 0; 00330 c2y++; 00331 } 00332 else c2x++; 00333 } //end of oInd4 00334 //Raster::VisuFloat(c2Res, FLOAT_NORM_0_255, "dog-c2Res.pgm"); 00335 00336 //***************************************************** 00337 //learning 00338 float udWt = 0; 00339 Image<float> wtC2Res(16, 16, NO_INIT); 00340 std::cout << "before the sum of ud wt is ... " << sum(wt) << std::endl; 00341 std::cout << "before the mean ud wt is ... " << mean(wt) << std::endl; 00342 00343 //initialize the weight to "1" 00344 for(int y = 0; y < 16; y ++) { 00345 for(int x = 0; x < 16; x ++) { 00346 //std::cout << x << "," << y << " th.." << c2Res.getVal(x, y) << std::endl; 00347 wt.setVal(x,y,1); 00348 //std::cout << x << "," << y << " th.." << wt.getVal(x, y) << std::endl; 00349 } 00350 } 00351 00352 //update the weight 00353 for(int y = 0; y < 16; y ++) { 00354 for(int x = 0; x < 16; x ++) { 00355 if (target == 1) 00356 udWt = wt.getVal(x,y) * (1 + eta*c2Res.getVal(x,y)); 00357 else if (target == 0) 00358 udWt = wt.getVal(x,y) * (1 - eta*c2Res.getVal(x,y)); 00359 wt.setVal(x,y,udWt); 00360 //wtC2Res.setVal(x,y,udWt*c2Res.getVal(x,y)); 00361 //std::cout << x <<"(aUDwt)"<< y <<" th.."<< wt.getVal(x, y) << std::endl; 00362 } 00363 } 00364 00365 //normalize the weight 00366 //wt = wt/mean(wt); 00367 float wVal = 0.0; 00368 for (int y = 0; y < 16; y++) { 00369 for (int x = 0; x < 16; x++) { 00370 if (wVal <= wt.getVal(x,y)) 00371 wVal = wt.getVal(x,y); 00372 } 00373 } 00374 wt = wt/wVal; 00375 /* 00376 //check the normalized/updated weight 00377 for(int y = 0; y < 16; y ++) { 00378 for(int x = 0; x < 16; x ++) { 00379 std::cout << x << "(aNMwt)" << y << " th.." << wt.getVal(x, y) << std::endl; 00380 } 00381 } 00382 */ 00383 //write the result to a file 00384 if (wFile.is_open()) { 00385 wFile << "########## image.." << imgInd << " iteration"<<itr<<std::endl; 00386 wFile << "the sum of ud wt is ... " << sum(wt); 00387 wFile << " the mean ud wt is ... " << mean(wt) << std::endl; 00388 00389 //print out the wight matrix 00390 for(int y = 0; y < 16; y ++) { 00391 //std::cout << "*" << y << "* "; 00392 for(int x = 0; x < 16; x ++) { 00393 wFile << wt.getVal(x, y) << "\t"; 00394 wtC2Res.setVal(x,y,wt.getVal(x,y)*c2Res.getVal(x,y)); 00395 } 00396 wFile << std::endl; 00397 } 00398 wFile << "wt sum..." << sum(wtC2Res) << std::endl; 00399 }//end of file writing 00400 00401 // show the weights: 00402 Image<float> wt2(wt); inplaceNormalize(wt2, 0.0f, 255.0f); 00403 xwi.clear(255); 00404 for (int i = 1; i < 256; i ++) { 00405 Point2D<int> p1(i-1, (int)wt2.getVal(i-1)), p2(i, (int)wt2.getVal(i)); 00406 drawLine(xwi, p1, p2, byte(0)); 00407 } 00408 xww.drawImage(xwi); 00409 00410 }//end of imgInd 00411 }//end of itr 00412 //}//end of tItrInd 00413 wFile.close(); 00414 }//end of main 00415 00416 // ###################################################################### 00417 /* So things look consistent in everyone's emacs... */ 00418 /* Local Variables: */ 00419 /* indent-tabs-mode: nil */ 00420 /* End: */