test-hmax.C

Go to the documentation of this file.
00001 /*!@file HMAX/test-hmax.C This is a test of the C++ hmax object recognition */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the //
00005 // University of Southern California (USC) and the iLab at USC.         //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: Laurent Itti <itti@usc.edu>
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/HMAX/test-hmax.C $
00035 // $Id: test-hmax.C 9412 2008-03-10 23:10:15Z farhan $
00036 //
00037 
00038 #include "Channels/Jet.H"
00039 #include "GUI/XWindow.H"
00040 #include "Image/DrawOps.H"
00041 #include "Image/Image.H"
00042 #include "Image/ImageSet.H"
00043 #include "Image/Kernels.H"   // for dogFilterHmax()
00044 #include "Image/MathOps.H"
00045 #include "Image/ShapeOps.H"
00046 #include "Raster/Raster.H"
00047 #include "Util/log.H"
00048 
00049 #include <cmath>
00050 #include <dirent.h>
00051 #include <fstream>
00052 #include <iostream>
00053 #include <string>
00054 #include <vector>
00055 
00056 // show debugging info on memory alloc/free
00057 //#define  ARRAYDATA_DEBUG_ALLOC
00058 
00059 // include class definitions for all classes that define or use
00060 // templates. See MakeObjects for why we do that strange thing!
00061 
00062 inline float sqr(float x) {
00063   return (x * x);
00064 }
00065 
00066 std::vector<std::string> readDir(std::string inName) {
00067   DIR *dp = opendir(inName.c_str());
00068   dirent *dirp;
00069   std::vector<std::string> fList;
00070   while ((dirp = readdir(dp)) != NULL ) {
00071     if (dirp->d_name[0] != '.')
00072       fList.push_back(inName + '/' + std::string(dirp->d_name));
00073   }
00074   std::cout << "# of files ...: " << fList.size() << std::endl;
00075   std::cout << "file list : " << std::endl;
00076   for (unsigned int i=0; i<fList.size(); i++)
00077     std::cout << "\t" << fList[i] << std::endl;
00078   return fList;
00079 }
00080 
00081 // ######################################################################
00082 // ##### Main Program:
00083 // ######################################################################
00084 //! The main routine. Read images, process, display results
00085 int main(int argc, char **argv)
00086 {
00087   float currStd = 1.75;
00088   float stdStep = 0.5;
00089   float currOri = 0.0;
00090   float oriStep = 45.0;
00091   int numOri = 4;
00092   int numScale = 12;
00093   int numScaleBand = 4;
00094   int minFS = 7;
00095   int maxFS = 29;
00096   int sSFS = 2;
00097   int fieldSize = 0;
00098   int cropBegin, cropEnd;
00099   int sizeOfPR = 4; // size for spatial pooling range in c1
00100   int poolRange[] = {4, 6, 9, 12};
00101   int scaleSS[] = {1, 3, 6, 9, 13};
00102   int fSiz[numScale*numScaleBand];
00103   int c1OL = 2; // define the ovelap of pooling range in c1
00104   int c1R = 0; //define c1's abstraction from s1 : receptive range
00105   int imgHeight = 128;
00106   int imgWidth = 128;
00107   int c1RSize = 0;
00108   int resImg, resX, resY; // index of c1Res[]
00109   int c1Size, c2Size, s2Size;
00110   int c2x, c2y;
00111   float res = 0;
00112   float s2Res = 0;
00113   int angleFlag = 1;
00114   int s2Target = 0; //the mean of the gaussian: center
00115   int target = 0;
00116   int tItr[] = {2, 600, 1200};
00117 
00118   // ########## parse options:
00119   char imname[1024]; imname[0] = '\0';
00120   strncpy(imname, argv[1], 1023);
00121   target = atoi(argv[2]);
00122   std::string inName = imname;
00123 
00124   std::vector<std::string> fileList = readDir(inName);
00125 
00126   // ########## file for restore the result
00127   std::ofstream wFile ("weightFile");
00128 
00129   // weighting for IT
00130   float eta = 0.2;
00131   Image<float> wt(16, 16, NO_INIT);
00132 
00133   // buffer for intermediate c1 response
00134   Image<float> c1Buff[numScaleBand];
00135   // c1 result : 4 * 4 =16 features
00136   Image<float> c1Res[numOri*sizeOfPR];
00137   // c2 result : a 16 * 16 = 256 pixels image
00138   Image<float> c2Res(16, 16, NO_INIT);
00139   Image<byte> im;
00140   Image<float> filters[numOri*numScale];
00141   Image<float> tmpFilt;
00142 
00143   // display weights:
00144   XWindow xww(Dims(256, 256), -1, -1, "C2->IT Weights");
00145   Image<byte> xwi(256, 256, NO_INIT);
00146   XWindow xwimg(Dims(imgWidth, imgHeight), -1, -1, "Original Image");
00147 
00148   //for(unsigned int tItrInd = 0; tItrInd < 3; tItrInd++) {
00149   for(int itr=0; itr < tItr[0]; itr++) {
00150     for(unsigned int imgInd=0; imgInd<fileList.size(); imgInd++) {
00151       int fInd = 0;
00152       std::cout << "**********img..." << imgInd << std::endl;
00153       std::cout << "target..." << target << std::endl;
00154 
00155       //read input Image and convert to float
00156       im = Raster::ReadGray(fileList[imgInd], RASFMT_PNM);
00157       std::cout << "the size of image...im...." << im.getSize() << std::endl;
00158       rescale(im, imgWidth, imgHeight);
00159 
00160       xwimg.drawImage(im);
00161 
00162       Image<float> fim = im;
00163       //Raster::VisuFloat(fim,FLOAT_NORM_0_255,"float-image.pgm");
00164 
00165       //***************************************************
00166       //making filters : numOri(4) * numScale(12) = 48 filters
00167       for(int filtSize=minFS; filtSize<=maxFS; filtSize+=sSFS) {
00168         fieldSize = (int)(filtSize * ::sqrt(2) + 1);
00169         fieldSize = fieldSize + 1 + (fieldSize % 2);
00170         cropBegin = (int)((fieldSize-filtSize) / 2) + 1;
00171         cropEnd = cropBegin + filtSize;
00172         currStd += stdStep;
00173         currOri = 0;
00174         for(int oriInd = 0; oriInd < numOri; oriInd ++) {
00175           tmpFilt.resize(fieldSize,fieldSize,true);
00176           filters[fInd].resize(maxFS*maxFS,1,true);
00177           currOri += oriStep;
00178           tmpFilt = dogFilterHmax<float>(currStd,currOri,cropBegin,cropEnd);
00179 
00180           fSiz[fInd] = tmpFilt.getHeight();
00181           int filtElm = 0;
00182           for(int y = 0; y < filtSize; y++) {
00183             for(int x = 0; x < filtSize; x ++) {
00184               filters[fInd].setVal(filtElm,0,(tmpFilt.getVal(x,y)));
00185               filtElm++;
00186             }
00187           }
00188           //Raster::VisuFloat(filters[fInd],FLOAT_NORM_0_255,"dog-filters.pgm");
00189           fInd ++;
00190         }       //end of oriInd
00191       }       //end of filtSize : end of making filters
00192       //**********************************end of making filters
00193 
00194       //*************  copy img and pad with 0s : expand by maxFS size
00195       Image<float> fExpnd(imgHeight+maxFS, imgWidth+maxFS, NO_INIT);
00196       int xVal = 0;
00197       int yVal = 0;
00198       //fExpnd.resize(imgHeight+maxFS, imgWidth+maxFS, true);
00199       for(int y = 0; y < imgHeight+maxFS; y ++) {
00200         if( y < (int)(maxFS/2) || y >= imgHeight+(int)(maxFS/2)) {
00201           for(int x = 0; x < imgWidth+maxFS; x ++)
00202             fExpnd.setVal(x,y,0.0);
00203         } // end of if
00204         else {
00205           for(int x = 0; x < imgWidth+maxFS; x ++) {
00206             if( x < (int)(maxFS/2) || x >= imgWidth+(int)(maxFS/2))
00207               fExpnd.setVal(x,y,0.0);
00208             else {
00209               fExpnd.setVal(x,y,fim.getVal(xVal,yVal));
00210               xVal ++;
00211             }
00212           }
00213           xVal = 0;
00214           yVal ++;
00215         } // end of else
00216       } //end of copy img
00217 
00218       //*********************************** S1/C1
00219       int bStart = (int)(maxFS/2);
00220       int currBX = 0;
00221       int currBY = 0;
00222       fInd = 0;
00223 
00224       //loop for scales of filters
00225       for(int scaleInd = 0; scaleInd < numScaleBand; scaleInd ++) {
00226         //initialize c1Buff[]
00227         for(int i = 0; i < numScaleBand; i ++) {
00228           c1Buff[i].resize(imgHeight,imgWidth,true);
00229         }
00230         for(int currScale = scaleSS[scaleInd];
00231             currScale < scaleSS[scaleInd+1]; currScale++) {
00232           for(int oriInd = 0; oriInd < numOri; oriInd ++) {
00233             int fSize = fSiz[numOri * (currScale - 1) + oriInd];
00234             for(int y = 0; y < imgHeight; y ++) {
00235               for(int x = 0; x < imgWidth; x ++) {
00236                 float intRes = 0.0;
00237                 float imgLen = 0.0;
00238                 currBX = bStart - (int)(fSize/2) + x;
00239                 currBY = bStart - (int)(fSize/2) + y;
00240                 for(int hig = 0; hig < fSize; hig ++) {
00241                   for(int wid = 0; wid < fSize; wid ++) {
00242                     imgLen += sqr(fExpnd.getVal(currBX+wid,currBY+hig));
00243                     intRes += fExpnd.getVal(currBX+wid,currBY+hig)
00244                       * filters[fInd].getVal(wid+(hig*fSize),0);
00245                   }
00246                 }// end of a filter
00247                 if(angleFlag && (imgLen > 0.0)) intRes /= ::sqrt(imgLen);
00248                 //rectify : take the absolute value
00249                 if(intRes < 0.0) intRes = -intRes;
00250                 if(intRes > c1Buff[oriInd].getVal(x,y))
00251                   c1Buff[oriInd].setVal(x,y,intRes);
00252               } // x
00253             }// end of an img : y
00254             fInd ++;
00255           }//end of orientations of filters
00256         }//end of currScale
00257 
00258         //decide pooling range
00259         c1R = (int)ceil((double)poolRange[scaleInd]/c1OL);
00260         int c1BuffInd = 0;
00261         for(int oInd = 0; oInd < numOri; oInd ++) {
00262           //initialize c1Res[]:just for a pooling range size of scale band:4
00263           c1RSize = (int)ceil((double)imgHeight/c1R);
00264           //trick for reverse c1Res order : <3,2,1,0>, <7,6,5,4>, <11,6,5,4>
00265           //and <15,14,13,12>
00266           resImg = (scaleInd * numScaleBand) + 3 - oInd;
00267           c1Res[resImg].resize(c1RSize,c1RSize,true);
00268           resX = resY = 0;
00269           //loop for an c1Buff
00270           for(int y = 0; y < imgHeight; y += c1R) {
00271             for(int x = 0; x < imgWidth; x += c1R) {
00272               //loop for an pooling range
00273               for(int yInPR = y; (yInPR-y < poolRange[scaleInd])
00274                     && (yInPR < imgHeight); yInPR ++) {
00275                 for(int xInPR = x; (xInPR-x < poolRange[scaleInd])
00276                       && (xInPR < imgWidth); xInPR ++) {
00277                   if(c1Buff[c1BuffInd].getVal(xInPR,yInPR)
00278                      > c1Res[resImg].getVal(resX, resY))
00279                     c1Res[resImg].setVal (resX,resY,
00280                                           c1Buff[c1BuffInd].getVal(xInPR,yInPR));
00281                 } //: xInPR
00282               } //: yInPR
00283               resX ++;
00284             } //: x
00285             resY ++;
00286             resX = 0;
00287           }//end of an c1Buff loop : y
00288           c1BuffInd++;
00289         }
00290       }//end of scales of filters
00291 
00292       Image<float> c1All;
00293       c1All = concatArray(c1Res,16,4,64,64);
00294       Raster::VisuFloat(c1All,FLOAT_NORM_0_255,"c1All.pgm");
00295 
00296       //***************************************************** S2/C2 begin
00297       //The result of C1 layer is on c1Res[]
00298       //now for S2 layer : combine C1 results
00299       s2Size = c2Size = 0;
00300       c2x = c2y = 0;
00301       for(int oInd1 = 0; oInd1 < numOri; oInd1++)
00302         for(int oInd2 = 0; oInd2 < numOri; oInd2++)
00303           for(int oInd3 = 0; oInd3 < numOri; oInd3++)
00304             for(int oInd4 = 0; oInd4 < numOri; oInd4++) {
00305               c2Res.setVal(c2x,c2y,-1e10);
00306               res = -1e10;
00307               for(int scaleInd = 0; scaleInd < numScaleBand; scaleInd++) {
00308                 c1Size = c1Res[scaleInd * numOri].getHeight();
00309                 for(int y = c1OL; y < c1Size; y ++) {
00310                   for(int x = c1OL; x < c1Size; x ++) {
00311                     s2Res = (
00312                              sqr(c1Res[(scaleInd*numOri) +
00313                                        oInd1].getVal(y,x) - s2Target) +
00314                              sqr(c1Res[(scaleInd*numOri) +
00315                                        oInd2].getVal(y,x-c1OL) - s2Target) +
00316                              sqr(c1Res[(scaleInd*numOri) +
00317                                        oInd3].getVal(y-c1OL,x) - s2Target) +
00318                              sqr(c1Res[(scaleInd*numOri) +
00319                                        oInd4].getVal(y-c1OL,x-c1OL) - s2Target));
00320                     if(s2Res > res)  res = s2Res;
00321                     s2Size ++;
00322                   }// end of "x" for c1Size
00323                 }// end of "y" for c1Size
00324                 if(res > c2Res.getVal(c2x, c2y))
00325                   c2Res.setVal(c2x, c2y, res);
00326               } //end of scaleInd
00327               c2Size++;
00328               if(c2x >= 15) {
00329                 c2x = 0;
00330                 c2y++;
00331               }
00332               else c2x++;
00333             } //end of oInd4
00334       //Raster::VisuFloat(c2Res, FLOAT_NORM_0_255, "dog-c2Res.pgm");
00335 
00336       //*****************************************************
00337       //learning
00338       float udWt = 0;
00339       Image<float> wtC2Res(16, 16, NO_INIT);
00340       std::cout << "before the sum of ud wt is ... " << sum(wt) << std::endl;
00341       std::cout << "before the mean ud wt is ... " << mean(wt) << std::endl;
00342 
00343       //initialize the weight to "1"
00344       for(int y = 0; y < 16; y ++) {
00345         for(int x = 0; x < 16; x ++) {
00346           //std::cout << x << "," << y << " th.." << c2Res.getVal(x, y) << std::endl;
00347           wt.setVal(x,y,1);
00348           //std::cout << x << "," << y << " th.." << wt.getVal(x, y) << std::endl;
00349         }
00350       }
00351 
00352       //update the weight
00353       for(int y = 0; y < 16; y ++) {
00354         for(int x = 0; x < 16; x ++) {
00355           if (target == 1)
00356             udWt = wt.getVal(x,y) * (1 + eta*c2Res.getVal(x,y));
00357           else if (target == 0)
00358             udWt = wt.getVal(x,y) * (1 - eta*c2Res.getVal(x,y));
00359           wt.setVal(x,y,udWt);
00360           //wtC2Res.setVal(x,y,udWt*c2Res.getVal(x,y));
00361           //std::cout << x <<"(aUDwt)"<< y <<" th.."<< wt.getVal(x, y) << std::endl;
00362         }
00363       }
00364 
00365       //normalize the weight
00366       //wt = wt/mean(wt);
00367       float wVal = 0.0;
00368       for (int y = 0; y < 16; y++) {
00369         for (int x = 0; x < 16; x++) {
00370           if (wVal <= wt.getVal(x,y))
00371             wVal = wt.getVal(x,y);
00372         }
00373       }
00374       wt = wt/wVal;
00375 /*
00376       //check the normalized/updated weight
00377       for(int y = 0; y < 16; y ++) {
00378         for(int x = 0; x < 16; x ++) {
00379           std::cout << x << "(aNMwt)" << y << " th.." << wt.getVal(x, y) << std::endl;
00380         }
00381       }
00382 */
00383       //write the result to a file
00384       if (wFile.is_open()) {
00385         wFile << "########## image.." << imgInd << " iteration"<<itr<<std::endl;
00386         wFile << "the sum of ud wt is ... " << sum(wt);
00387         wFile << "   the mean ud wt is ... " << mean(wt) << std::endl;
00388 
00389         //print out the wight matrix
00390         for(int y = 0; y < 16; y ++) {
00391           //std::cout << "*" << y << "* ";
00392           for(int x = 0; x < 16; x ++) {
00393             wFile << wt.getVal(x, y) << "\t";
00394             wtC2Res.setVal(x,y,wt.getVal(x,y)*c2Res.getVal(x,y));
00395           }
00396           wFile << std::endl;
00397         }
00398         wFile << "wt sum..." << sum(wtC2Res) << std::endl;
00399       }//end of file writing
00400 
00401       // show the weights:
00402       Image<float> wt2(wt); inplaceNormalize(wt2, 0.0f, 255.0f);
00403       xwi.clear(255);
00404       for (int i = 1; i < 256; i ++) {
00405         Point2D<int> p1(i-1, (int)wt2.getVal(i-1)), p2(i, (int)wt2.getVal(i));
00406         drawLine(xwi, p1, p2, byte(0));
00407       }
00408       xww.drawImage(xwi);
00409 
00410     }//end of imgInd
00411   }//end of itr
00412   //}//end of tItrInd
00413   wFile.close();
00414 }//end of main
00415 
00416 // ######################################################################
00417 /* So things look consistent in everyone's emacs... */
00418 /* Local Variables: */
00419 /* indent-tabs-mode: nil */
00420 /* End: */