00001 /*!@file Neuro/InferoTemporalHmax.C Object recognition module with Hmax */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00005 // University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Lior Elazary 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Neuro/InferoTemporalHmax.C $ 00035 // $Id: InferoTemporalHmax.C 14157 2010-10-22 00:54:14Z dparks $ 00036 // 00037 00038 #include "Neuro/InferoTemporalHmax.H" 00039 #include "HMAX/HmaxFL.H" 00040 #include "Learn/SVMClassifier.H" 00041 #include "Component/OptionManager.H" 00042 #include "Component/ModelOptionDef.H" 00043 #include "Image/MathOps.H" 00044 #include "Image/ShapeOps.H" 00045 #include "Image/CutPaste.H" 00046 #include "Image/ColorOps.H" 00047 #include "Neuro/NeuroOpts.H" 00048 #include "Neuro/NeuroSimEvents.H" 00049 #include "Neuro/Brain.H" 00050 #include "Neuro/VisualCortex.H" 00051 #include "Simulation/SimEventQueue.H" 00052 #include "Media/MediaSimEvents.H" 00053 00054 #include <cstdlib> 00055 #include <iostream> 00056 #include <iomanip> 00057 #include <fstream> 00058 00059 00060 const ModelOptionDef OPT_ITHMAXC1PatchesDir = 00061 { MODOPT_ARG_STRING, "ITC feature patches dir", &MOC_ITC, OPTEXP_CORE, 00062 "Directory of ordered patch files named C1Patches.<PATCH_SIZES>.<NUM_PATCHES_PER_SIZE>.<PATCH_ORIENTATIONS>.pnm, " 00063 "where PATCH_SIZES iterates over the number of patch scale sizes (all patches are square), " 00064 "where NUM_PATCHES_PER_SIZE iterates over the number of patches for each size, " 00065 "where PATCH_ORIENTATIONS iterates over the number of orientations, " 00066 "each iterator goes from 0 to X-1", 00067 "it-hmax-c1patches-dir", '\0', "<dirname>", "" }; 00068 00069 00070 const ModelOptionDef OPT_ITHMAXFeatureVectorFileName = 00071 { MODOPT_ARG_STRING, "ITC HMAX Feature Vector File Name", &MOC_ITC, OPTEXP_CORE, 00072 "Output the feature vectors with their ids into a file", 00073 "it-hmax-feature-vector-filename", '\0', "<filename>", "" }; 00074 00075 00076 const ModelOptionDef OPT_ITHMAXDisableClassifier = 00077 { MODOPT_FLAG, "ITC HMAX Classifier Disable", &MOC_ITC, OPTEXP_CORE, 00078 "Disable the internal classifier module inside of the ITHmax class, designed to be used when system has an external" 00079 "classifier to avoid conflicts", 00080 "it-hmax-disable-classifier", '\0', "<boolean>", "false" }; 00081 00082 00083 00084 // ###################################################################### 00085 namespace 00086 { 00087 Image<PixRGB<byte> > getCroppedObject(const Image<PixRGB<byte> >& scene, 00088 const Image<float>& smoothMask) 00089 { 00090 if (!scene.initialized()) 00091 return Image<PixRGB<byte> >(); 00092 00093 if (!smoothMask.initialized()) 00094 return Image<PixRGB<byte> >(); 00095 00096 const float threshold = 1.0f; 00097 00098 const Rectangle r = findBoundingRect(smoothMask, threshold); 00099 return crop(scene, r); 00100 } 00101 } 00102 00103 00104 // ###################################################################### 00105 InferoTemporalHmax::InferoTemporalHmax(OptionManager& mgr, 00106 const std::string& descrName, 00107 const std::string& tagName) : 00108 InferoTemporal(mgr, descrName, tagName), 00109 itsHMAXStoredPatchesDir(&OPT_ITHMAXC1PatchesDir, this), 00110 itsHMAXFeatureVectorFileName(&OPT_ITHMAXFeatureVectorFileName, this), 00111 itsHMAXDisableClassifier(&OPT_ITHMAXDisableClassifier,this), 00112 itsClassifier(NULL) 00113 { 00114 } 00115 00116 // ###################################################################### 00117 void InferoTemporalHmax::start1() 00118 { 00119 // Initialize hmax with feature learning 00120 std::vector<int> scss(9); 00121 scss[0] = 1; scss[1] = 3; scss[2] = 5; scss[3] = 7; scss[4] = 9; 00122 scss[5] = 11; scss[6] = 13; scss[7] = 15; scss[8] = 17; 00123 std::vector<int> spss(8); 00124 spss[0] = 8; spss[1] = 10; spss[2] = 12; spss[3] = 14; 00125 spss[4] = 16; spss[5] = 18; spss[6] = 20; spss[7] = 22; 00126 int nori = 4; 00127 00128 if(itsHMAXStoredPatchesDir.getVal().compare("") == 0) { 00129 LFATAL("Must specify directory containing C1 Patches using --it-hmax-c1patches-dir"); 00130 } 00131 00132 if(!itsHMAXDisableClassifier.getVal()) 00133 { 00134 itsClassifier = rutz::shared_ptr<SVMClassifierModule>(new SVMClassifierModule(getManager(),"","")); 00135 } 00136 else 00137 { 00138 printf("Did not initialize classifier inside of hmax\n"); 00139 } 00140 00141 00142 00143 // Initialize hmax 00144 itsHmax.init(nori,spss,scss); 00145 // Load the patches 00146 itsHmax.readInC1Patches(itsHMAXStoredPatchesDir.getVal()); 00147 00148 00149 // Clear out the feature vector file if desired 00150 // if(itsHMAXFeatureVectorFileName.getVal().compare("") != 0) { 00151 // std::ofstream c2File; 00152 // c2File.open(itsHMAXFeatureVectorFileName.getVal().c_str(),std::ios::out); 00153 // c2File.close(); 00154 // } 00155 00156 00157 InferoTemporal::start1(); 00158 } 00159 00160 // ###################################################################### 00161 void InferoTemporalHmax::stop1() 00162 { 00163 if(!itsHMAXDisableClassifier.getVal()) 00164 { 00165 itsClassifier = rutz::shared_ptr<SVMClassifierModule>(NULL); 00166 } 00167 } 00168 00169 // ###################################################################### 00170 InferoTemporalHmax::~InferoTemporalHmax() 00171 {} 00172 00173 // ###################################################################### 00174 void InferoTemporalHmax::attentionShift(SimEventQueue& q, 00175 const Point2D<int>& location) 00176 { 00177 int id = -1; 00178 std::string name = ""; 00179 Image<float> inputf; 00180 Image<PixRGB<float> > objImg; 00181 00182 if(!itsClassifier.is_valid()) 00183 LFATAL("Classifier was disabled, so attention shift is not supported"); 00184 00185 // get the lastest input frame from the retina: 00186 if (SeC<SimEventRetinaImage> e = q.check<SimEventRetinaImage>(this)) 00187 objImg = e->frame().colorByte(); 00188 else 00189 LFATAL("Oooops, no input frame in the event queue?"); 00190 00191 // get the latest smooth mask from the shape estimator: 00192 Image<float> smoothMask; 00193 if (SeC<SimEventShapeEstimatorOutput> 00194 e = q.check<SimEventShapeEstimatorOutput>(this)) 00195 { 00196 smoothMask = e->smoothMask(); 00197 // crop around object using mask? 00198 //if (itsUseAttention.getVal()) 00199 objImg = getCroppedObject(objImg, smoothMask); 00200 } 00201 if (!objImg.initialized()) return; // no object image, so just do nothing 00202 00203 // Convert color image to grayscale using NTSC coordinates 00204 inputf = luminanceNTSC(objImg); 00205 // Pull the object data, if any 00206 if(itsClassifier->getMode().compare("Train") == 0) 00207 { 00208 SVMObject so; 00209 if(SeC<SimEventObjectDescription> d = q.check<SimEventObjectDescription>(this)) { 00210 rutz::shared_ptr<TestImages::ObjData> objData = d->getObjData(); 00211 // Extract the id out of the object data 00212 id = objData->id; 00213 name = objData->name; 00214 } 00215 } 00216 // Calculate feature vector 00217 std::vector<float> featureVector = calculateFeatureVector(inputf); 00218 // Call classifier to determine label from feature vector 00219 SVMObject so = itsClassifier->determineLabel(featureVector,id,name); 00220 00221 // Post the object 00222 if (itsClassifier->getMode().compare("Rec") == 0) // Recognition 00223 { 00224 rutz::shared_ptr<TestImages::ObjData> objData(new TestImages::ObjData); 00225 objData->id = so.id; 00226 if(so.initialized()) 00227 objData->name = so.name; 00228 else 00229 objData->name = ""; 00230 objData->maxProb = so.confidence; //FIX ME! 00231 objData->normProb = so.confidence; //FIX ME! 00232 00233 LINFO("OBJECT RECOGNITION: Object identified as %s[%d]\n",objData->name.c_str(),objData->id); 00234 00235 rutz::shared_ptr<SimEventObjectDescription> 00236 objDataEvent(new SimEventObjectDescription(this, objData)); 00237 00238 q.post(objDataEvent); 00239 } 00240 } 00241 00242 std::vector<float> InferoTemporalHmax::calculateFeatureVector(Image<float> img) 00243 { 00244 // Allocate memory for c2 layer feature values 00245 float ** c2Res = _createFeatureVector(); 00246 itsHmax.getC2(img,c2Res); 00247 std::vector<float> ret = _convertFeatureVector(c2Res); 00248 _freeFeatureVector(c2Res); 00249 00250 return ret; 00251 } 00252 00253 void InferoTemporalHmax::writeOutFeatureVector(std::vector<float> featureVector, int id) 00254 { 00255 std::ofstream c2File; 00256 c2File.open(itsHMAXFeatureVectorFileName.getVal().c_str(),std::ios::app); 00257 if (c2File.is_open()) { 00258 c2File << id << " "; 00259 for(uint i=0;i<featureVector.size();i++) { 00260 c2File << std::setiosflags(std::ios::fixed) << std::setprecision(4) << 00261 (i+1) << ":" << featureVector[i] << " "; 00262 } 00263 c2File << std::endl; 00264 } 00265 c2File.close(); 00266 } 00267 00268 float** InferoTemporalHmax::_createFeatureVector() 00269 { 00270 // Allocate memory for c2 layer feature values 00271 uint c2dim1 = itsHmax.getC1PatchSizes().size(); 00272 uint c2dim2 = itsHmax.getC1PatchesPerSize(); 00273 float **c2Res = new float*[c2dim1]; 00274 for(unsigned int i=0;i<c2dim1;i++) { 00275 c2Res[i] = new float[c2dim2]; 00276 } 00277 return c2Res; 00278 } 00279 00280 std::vector<float> InferoTemporalHmax::_convertFeatureVector(float **c2Res) 00281 { 00282 std::vector<float> ret; 00283 // Allocate memory for c2 layer feature values 00284 uint c2dim1 = itsHmax.getC1PatchSizes().size(); 00285 uint c2dim2 = itsHmax.getC1PatchesPerSize(); 00286 for(unsigned int i=0;i<c2dim1;i++) { 00287 for(unsigned int j=0;j<c2dim2;j++) { 00288 ret.push_back(c2Res[i][j]); 00289 } 00290 } 00291 return ret; 00292 } 00293 00294 void InferoTemporalHmax::_freeFeatureVector(float **c2Res) 00295 { 00296 uint c2dim1 = itsHmax.getC1PatchSizes().size(); 00297 for(unsigned int i=0;i<c2dim1;i++) { 00298 delete[] c2Res[i]; 00299 } 00300 delete [] c2Res; 00301 } 00302 00303 00304 SVMObject InferoTemporalHmax::determineLabel(Image<float> objImg, int id, std::string name) 00305 { 00306 SVMObject so; 00307 //extract features 00308 std::vector<float> featureVector=calculateFeatureVector(objImg); 00309 so=itsClassifier->determineLabel(featureVector,id,name); 00310 // Write out the data to a feature vector file if desired 00311 if(itsHMAXFeatureVectorFileName.getVal().compare("") != 0) { 00312 writeOutFeatureVector(featureVector,so.id); 00313 } 00314 return so; 00315 } 00316 00317 00318 00319 00320 00321 // ###################################################################### 00322 /* So things look consistent in everyone's emacs... */ 00323 /* Local Variables: */ 00324 /* indent-tabs-mode: nil */ 00325 /* End: */ 00326