00001 /*!@file Neuro/InferoTemporalSIFT.C Object recognition module with SIFT */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00005 // University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Sophie Marat 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Neuro/InferoTemporalSIFT.C $ 00035 // $Id: InferoTemporalSIFT.C 14244 2010-11-17 02:56:14Z sophie $ 00036 // 00037 00038 #include "Neuro/InferoTemporalSIFT.H" 00039 00040 #include "Component/OptionManager.H" 00041 #include "Component/ModelOptionDef.H" 00042 #include "Neuro/NeuroOpts.H" 00043 #include "Neuro/NeuroSimEvents.H" 00044 #include "Neuro/Brain.H" 00045 #include "Neuro/VisualCortex.H" 00046 #include "Neuro/ShapeEstimator.H" 00047 #include "Simulation/SimEventQueue.H" 00048 #include "Media/MediaSimEvents.H" 00049 #include "SIFT/Keypoint.H" 00050 #include "SIFT/VisualObject.H" 00051 #include "SIFT/VisualObjectDB.H" 00052 #include "Image/MathOps.H" 00053 #include "Image/ShapeOps.H" 00054 #include "Image/CutPaste.H" 00055 #include "Image/ColorOps.H" 00056 #include "Raster/Raster.H" 00057 #include "Image/DrawOps.H" 00058 #include "GUI/XWindow.H" 00059 #include "Channels/ChannelMaps.H" 00060 #include "Channels/ChannelOpts.H" 00061 #include "Channels/ChannelBase.H" 00062 #include "Channels/RawVisualCortex.H" 00063 #include "Component/ModelManager.H" 00064 #include "Channels/IntensityChannel.H" 00065 #include "Channels/ColorChannel.H" 00066 #include "Channels/OrientationChannel.H" 00067 #include "Channels/ChannelMaps.H" 00068 #include "Util/FileUtil.H" 00069 #include "Util/log.H" 00070 #include "Util/sformat.H" 00071 #include "Util/StringUtil.H" 00072 #include "Raster/Raster.H" 00073 00074 #include "Image/Conversions.H" 00075 00076 #include "Neuro/GistEstimatorStd.H" 00077 #include "Component/ModelManager.H" 00078 #include "Channels/ChannelMaps.H" 00079 #include "GUI/XWinManaged.H" 00080 #include "Image/FilterOps.H" 00081 #include "Image/MathOps.H" 00082 #include "Image/ShapeOps.H" 00083 #include "Neuro/gistParams.H" 00084 #include "Raster/Raster.H" 00085 #include "Simulation/SimEvents.H" 00086 #include "Channels/SingleChannel.H" 00087 #include "Util/Timer.H" 00088 00089 #include "Component/RawGistEstimatorStd.H" 00090 00091 #include "Learn/SVMClassifier.H" 00092 #include "Neuro/SVMClassifierModule.H" 00093 00094 00095 #include <cstdlib> 00096 #include <iostream> 00097 #include <iomanip> 00098 #include <fstream> 00099 #include <cmath> 00100 #include <sstream> 00101 #include <dirent.h> 00102 00103 00104 const ModelOptionDef OPT_ITSIFTDatabase = 00105 { MODOPT_ARG_STRING, "ITC Database dir", &MOC_ITC, OPTEXP_CORE, 00106 "Directory of the SIFT Database", 00107 "it-SIFT-database-dir", '\0', "<filename>", " "}; 00108 const ModelOptionDef OPT_ITCMode = 00109 { MODOPT_ARG_STRING, "ITC Mode", &MOC_ITC, OPTEXP_CORE, 00110 "The mode of the ITC. Train: is for training from some data, Test is for recognition of the objects.", "it-mode", '\0', "<Train|Test>", ""}; 00111 const ModelOptionDef OPT_ITPathMatch = 00112 { MODOPT_ARG_STRING, "ITC Path Match", &MOC_ITC, OPTEXP_CORE, 00113 "The path of the objects to consider for matching", 00114 "it-SIFT-path-match", '\0', "<filename>", " "}; 00115 const ModelOptionDef OPT_ITCoarseReco = 00116 { MODOPT_ARG(bool), "Use Gist Coarse Reco", &MOC_ITC, OPTEXP_CORE, 00117 "Use gist to do a coarse pre-recognition", 00118 "it-gist-reco", '\0', "<true|false>", "false" }; 00119 00120 const ModelOptionDef OPT_ITSVMTrain = 00121 { MODOPT_ARG_STRING, "ITC SVM Train file save", &MOC_ITC, OPTEXP_CORE, 00122 "The filename where the training exemple for the SVM will be saved", 00123 "it-SVM-train-file", '\0', "<filename>", " "}; 00124 const ModelOptionDef OPT_ITSVMId = 00125 { MODOPT_ARG_STRING, "ITC Id for the SVM trainning", &MOC_ITC, OPTEXP_CORE, 00126 "the Id of the object trained on", 00127 "it-SVM-id", '\0', "<int>", ""}; 00128 const ModelOptionDef OPT_ITSVMClass = 00129 { MODOPT_ARG_STRING, "ITC Class for the SVM trainning", &MOC_ITC, OPTEXP_CORE, 00130 "the class of the object trained on, Used to complete the table Id:Class", 00131 "it-SVM-class", '\0', "<name>", " "}; 00132 const ModelOptionDef OPT_ITSVMModel = 00133 { MODOPT_ARG_STRING, "ITC SVM Model", &MOC_ITC, OPTEXP_CORE, 00134 "The SVM model to use for recognition", 00135 "it-SVM-model", '\0', "<filename>", " "}; 00136 const ModelOptionDef OPT_ITSVMRange = 00137 { MODOPT_ARG_STRING, "ITC SVM Range", &MOC_ITC, OPTEXP_CORE, 00138 "The range of the SVM model to rescale data before recognition", 00139 "it-SVM-range", '\0', "<filename>", " "}; 00140 const ModelOptionDef OPT_ITSObjNameSVM = 00141 { MODOPT_ARG_STRING, "ITC Obj Name", &MOC_ITC, OPTEXP_CORE, 00142 "The name of the object process for training svm", 00143 "it-SVM-obj-name", '\0', "<name>", " "}; 00144 const ModelOptionDef OPT_ITSFileRecoSave = 00145 { MODOPT_ARG_STRING, "ITC save reco res", &MOC_ITC, OPTEXP_CORE, 00146 "The file where the result of the recognition will be saved", 00147 "it-reco-save-file", '\0', "<filename>", " "}; 00148 00149 const ModelOptionDef OPT_ITSTable = 00150 { MODOPT_ARG_STRING, "ITC save table", &MOC_ITC, OPTEXP_CORE, 00151 "Table of the different class and their Id", 00152 "it-table", '\0', "<filename>", " "}; 00153 00154 00155 // ###################################################################### 00156 namespace 00157 { 00158 Image<PixRGB<byte> > getCroppedObject(const Image<PixRGB<byte> >& scene, 00159 const Image<float>& smoothMask) 00160 { 00161 if (!scene.initialized()) 00162 return Image<PixRGB<byte> >(); 00163 if (!smoothMask.initialized()) 00164 return Image<PixRGB<byte> >(); 00165 const float threshold = 1.0f; 00166 const Rectangle r = findBoundingRect(smoothMask, threshold); 00167 return crop(scene, r); 00168 } 00169 00170 Image<PixRGB<byte> > getCroppedObjectGist(const Image<PixRGB<byte> >& scene, 00171 const Image<float>& smoothMask) 00172 { 00173 if (!scene.initialized()) 00174 return Image<PixRGB<byte> >(); 00175 if (!smoothMask.initialized()) 00176 return Image<PixRGB<byte> >(); 00177 const float threshold = 1.0f; 00178 Dims dimGist(256, 256); 00179 const Rectangle r = findBoundingRect(smoothMask, threshold); 00180 Point2D<int> rCenter = r.center(); 00181 Rectangle rGist = Rectangle::centerDims(rCenter, dimGist); 00182 Image<PixRGB<byte> > imCrop; 00183 if (scene.rectangleOk(rGist)) 00184 { 00185 imCrop = crop(scene, rGist); 00186 LINFO("-----------------------------the rectangle fits the image------------------------"); 00187 LINFO("-------the image crop dim are W=%d, H=%d------------", imCrop.getWidth(), imCrop.getHeight()); 00188 } 00189 else 00190 { 00191 int cx = rCenter.i; 00192 int cy = rCenter.j; 00193 int ttop = std::max(0, cy -dimGist.min()/2); 00194 int lleft = std::max(0, cx - dimGist.min()/2); 00195 int bbot = std::min(scene.getHeight() - 1, cy + dimGist.min()/2); 00196 int rright = std::min(scene.getWidth() - 1, cx + dimGist.min()/2); 00197 Rectangle rGistcrop = Rectangle::tlbrI(ttop, lleft, bbot, rright); 00198 imCrop = crop(scene, rGistcrop); 00199 LINFO("-------the rectangle is too large, computing gist around object on image------------"); 00200 LINFO("-------the image crop dim are W=%d, H=%d------------", imCrop.getWidth(), imCrop.getHeight()); 00201 } 00202 Image<PixRGB<byte> > imCropr; 00203 imCropr = rescaleBilinear(imCrop, 256, 256); 00204 return imCropr; 00205 } 00206 } 00207 00208 // ###################################################################### 00209 00210 InferoTemporalSIFT::InferoTemporalSIFT(OptionManager& mgr, 00211 const std::string& descrName, 00212 const std::string& tagName) : 00213 InferoTemporal(mgr, descrName, tagName), 00214 itsSIFTStoredDatabase(&OPT_ITSIFTDatabase, this), 00215 itsITCMode(&OPT_ITCMode, this), 00216 itsPathMatch(&OPT_ITPathMatch, this), 00217 itsCoarseReco(&OPT_ITCoarseReco, this), 00218 itsTrainSVM(&OPT_ITSVMTrain, this), 00219 itsSVMId(&OPT_ITSVMId, this), 00220 itsSVMClass(&OPT_ITSVMClass, this), 00221 itsSVMModel(&OPT_ITSVMModel, this), 00222 itsSVMRange(&OPT_ITSVMRange, this), 00223 itsNameObj(&OPT_ITSObjNameSVM, this), /// 00224 itsRecoSave(&OPT_ITSFileRecoSave, this), 00225 itsTable(&OPT_ITSTable, this), // 00226 itsObjectDB(new VisualObjectDB()), 00227 itsnewObjectDB(new VisualObjectDB()), 00228 itsPDFGist(new std::map<double, int>()), 00229 itsVisualCortex(new RawVisualCortex(mgr)), 00230 itsGistEstim(new RawGistEstimatorStd(mgr))//, 00231 //itsClassifier(new SVMClassifierModule(mgr, "", "")) 00232 { 00233 addSubComponent(itsVisualCortex);// otherwise there is only one raw visual cortex and so add the new channel to general and only one ! 00234 LINFO("------------------------------before Set Model Param Val--------------------------"); 00235 LINFO("------------------------------itsRaw VisualCortex chans = %s--------------------------", itsVisualCortex->getModelParamString("RawVisualCortexChans").c_str()); 00236 //itsVisualCortex->setModelParamString("RawVisualCortexChans", "ICO"); //, MC_RECURSE); 00237 LINFO("------------------------------itsRaw VisualCortex chans = %s--------------------------", itsVisualCortex->getModelParamString("RawVisualCortexChans").c_str()); 00238 LINFO("------------------------------after Set Model Param Val--------------------------"); 00239 addSubComponent(itsGistEstim); 00240 // addSubComponent(itsClassifier); 00241 } 00242 00243 // ###################################################################### 00244 void InferoTemporalSIFT::start1() 00245 { 00246 if(itsSIFTStoredDatabase.getVal().compare("") == 0) 00247 { 00248 LFATAL("Must specify directory SIFT Database <dbname.vdb> using it-SIFT-database-dir"); 00249 } 00250 if (itsSIFTStoredDatabase.getVal().empty()) 00251 LINFO("Starting with empty visualObjectDB"); // already done above 00252 else 00253 { 00254 itsObjectDB->loadFrom(itsSIFTStoredDatabase.getVal()); 00255 if(itsPathMatch.getVal().compare(" ") == 0) 00256 { 00257 LINFO("No path specified to match the object aigainst the whole database will be used"); 00258 itsnewObjectDB = itsObjectDB; 00259 } 00260 } 00261 00262 //itsVisualCortex->stop(); 00263 LINFO("------------------------------before Set Model Param Val--------------------------"); 00264 itsVisualCortex->setModelParamString("RawVisualCortexChans", "ICO"); //, MC_RECURSE); 00265 itsVisualCortex->subComponent("orientation")->setModelParamString("NumOrientations", "4"); 00266 LINFO("------------------------------after Set Model Param Val--------------------------"); 00267 //itsVisualCortex->start(); 00268 LINFO("------------------------------after Set itsVisualCortexStart--------------------------"); 00269 LINFO("------------------------------before Infero temporal start1--------------------------"); 00270 LINFO("------------------------------itsRaw VisualCortex chans = %s--------------------------", itsVisualCortex->getModelParamString("RawVisualCortexChans").c_str()); 00271 InferoTemporal::start1(); 00272 LINFO("------------------------------itsRaw VisualCortex chans = %s--------------------------", itsVisualCortex->getModelParamString("RawVisualCortexChans").c_str()); 00273 LINFO("------------------------------after Infero temporal start1--------------------------"); 00274 00275 00276 //LINFO("------------------------------before Set Model Param Val--------------------------"); 00277 //itsVisualCortex->setModelParamString("RawVisualCortexChans", "ICO"); //, MC_RECURSE); 00278 //LINFO("------------------------------after Set Model Param Val--------------------------"); 00279 } 00280 00281 // ###################################################################### 00282 void InferoTemporalSIFT::stop1() 00283 { 00284 // save database if we have a filename for it: 00285 if (itsSIFTStoredDatabase.getVal().empty() == false) 00286 itsObjectDB->saveTo(itsSIFTStoredDatabase.getVal()); 00287 } 00288 00289 // ###################################################################### 00290 InferoTemporalSIFT::~InferoTemporalSIFT() 00291 { 00292 } 00293 00294 // ###################################################################### 00295 void InferoTemporalSIFT::attentionShift(SimEventQueue& q, 00296 const Point2D<int>& location) 00297 00298 { 00299 Image<PixRGB<byte> > objImg; 00300 Image<PixRGB<byte> > objImgGist; 00301 00302 // get the lastest input frame from the retina: 00303 if (SeC<SimEventRetinaImage> e = q.check<SimEventRetinaImage>(this, SEQ_ANY)) 00304 { 00305 // SEQ_ANY is a flag passed to SimEventQueue for returning any event marked or not 00306 objImg = e->frame().colorByte(); 00307 objImgGist = objImg; 00308 } 00309 else 00310 LFATAL("Oooops, no input frame in the event queue?"); 00311 00312 // get the latest smooth mask from the shape estimator: 00313 Image<float> smoothMask; 00314 if (SeC<SimEventShapeEstimatorOutput> 00315 e = q.check<SimEventShapeEstimatorOutput>(this, SEQ_ANY,0)) 00316 { 00317 smoothMask = e->smoothMask(); 00318 } 00319 else 00320 LINFO("no mask available"); 00321 00322 // create visual object and extract keypoints: 00323 rutz::shared_ptr<VisualObject> vo; 00324 rutz::shared_ptr<TestImages::ObjData> objData; 00325 00326 //////////////////////////////////////////////////////////////////////////// 00327 //TRAIN MODE 00328 if (itsITCMode.getVal().compare("Train") == 0) 00329 { 00330 rutz::shared_ptr<TestImages::SceneData> sceneData; 00331 00332 //Get the scene data, but don't mark it so we will get it on the next saccade 00333 if (SeC<SimEventInputFrame> e = q.check<SimEventInputFrame>(this))//, SEQ_UNMARKED,0)) ////////////// 00334 { 00335 GenericFrame gf =e->frame(); 00336 00337 LINFO("--------------------------------------------------------------------------"); 00338 LINFO("----------------Received the frame----------------------------------------------------------"); 00339 LINFO("--------------------------------------------------------------------------"); 00340 00341 // XML INPUT 00342 if(gf.hasMetaData(std::string("SceneData"))) 00343 { 00344 rutz::shared_ptr<GenericFrame::MetaData> metaData; 00345 metaData = gf.getMetaData(std::string("SceneData")); 00346 if (metaData.get() != 0) 00347 { 00348 sceneData.dyn_cast_from(metaData); 00349 std::vector<TestImages::ObjData> objVect = sceneData->objects; 00350 for(uint obj=0; obj<objVect.size(); obj++) 00351 { 00352 objData.reset(new TestImages::ObjData(objVect[obj])); 00353 if (objData->polygon.size() > 0) 00354 { 00355 Point2D<int> upperLeft = objData->polygon[0]; 00356 Point2D<int> lowerRight = objData->polygon[0]; 00357 for(uint k=0; k<objData->polygon.size(); k++) 00358 { 00359 //find the bounds for the crop 00360 if (objData->polygon[k].i < upperLeft.i) upperLeft.i = objData->polygon[k].i; 00361 if (objData->polygon[k].j < upperLeft.j) upperLeft.j = objData->polygon[k].j; 00362 if (objData->polygon[k].i > lowerRight.i) lowerRight.i = objData->polygon[k].i; 00363 if (objData->polygon[k].j > lowerRight.j) lowerRight.j = objData->polygon[k].j; 00364 } 00365 // crop around object using polygon 00366 Image<PixRGB<byte> > objImgTr; 00367 objImgTr = crop(objImg, Rectangle::tlbrO(upperLeft.j,upperLeft.i,lowerRight.j,lowerRight.i)); 00368 00369 //GIST 00370 // Add the case for the gist on neighborhood 00371 Dims dimGist(512, 512); 00372 Rectangle r = Rectangle::tlbrO(upperLeft.j,upperLeft.i,lowerRight.j,lowerRight.i); 00373 00374 00375 if(itsCoarseReco.getVal()) 00376 { 00377 Point2D<int> rCenter = r.center(); 00378 Rectangle rGist = Rectangle::centerDims(rCenter, dimGist); 00379 Image<PixRGB<byte> > objImgTrGist; 00380 objImgTrGist = crop(objImg, rGist); 00381 computeGist(objImgTrGist); 00382 } 00383 00384 //SIFT 00385 vo.reset(new VisualObject("NewObject", "NewObjet", objImgTr)); 00386 vo->setName(objData->name); 00387 vo->setImageFname(itsPathMatch.getVal().c_str() + objData->name + ".png"); 00388 if (itsObjectDB->addObject(vo)) 00389 LINFO("Added VisualObject '%s' to database", vo->getName().c_str()); 00390 else 00391 LERROR("FAILED adding VisualObject '%s' to database -- IGNORING", 00392 vo->getName().c_str()); 00393 } 00394 } 00395 rutz::shared_ptr<SimEventITOutput> 00396 objDataEvent(new SimEventITOutput(this, objData)); 00397 q.post(objDataEvent); 00398 } 00399 } 00400 else //IMAGE INPUT WITHOUT XML 00401 { 00402 // crop around object using mask 00403 objData.reset(new TestImages::ObjData); 00404 objImg = getCroppedObject(objImg, smoothMask); 00405 if (!objImg.initialized()) 00406 { 00407 return; // no object image, so just do nothing 00408 } 00409 00410 //GIST 00411 if(itsCoarseReco.getVal()) 00412 { 00413 objImgGist = getCroppedObjectGist(objImgGist, smoothMask); 00414 computeGist(objImgGist); 00415 } 00416 00417 //SIFT 00418 std::string objName; 00419 if (itsNameObj.getVal().empty()) 00420 { 00421 LINFO("Enter name for new object:"); //or [RETURN] to skip trainning:"); 00422 std::getline(std::cin, objName, '\n'); 00423 } 00424 else 00425 objName = itsNameObj.getValString();/// 00426 00427 if (objName.length() >0) 00428 { 00429 vo.reset(new VisualObject("NewObject", "NewObjet", objImg)); 00430 LINFO("Train on %s", objName.c_str()); 00431 vo->setName(objName); 00432 //vo->setImageFname(objName + ".png"); 00433 vo->setImageFname(itsPathMatch.getVal().c_str() + objName + ".png"); //for placing the obj.png at the right place in the taxonomy 00434 //to draw the object considered 00435 const float threshold = 1.0f; 00436 const Rectangle r = findBoundingRect(smoothMask, threshold); 00437 std::vector<Point2D<int> > polyvect(5); 00438 polyvect[0] = r.topLeft(); 00439 polyvect[1] = r.topRight(); 00440 polyvect[2] = r.bottomRight(); 00441 polyvect[3] = r.bottomLeft(); 00442 polyvect[4] = r.topLeft(); 00443 objData->polygon = polyvect; //std::vector<Point2D<int> > 00444 objData->name =objName; 00445 objData->dims = objImg.getDims(); 00446 } 00447 if (itsObjectDB->addObject(vo)) 00448 LINFO("Added VisualObject '%s' to database", vo->getName().c_str()); 00449 else 00450 LERROR("FAILED adding VisualObject '%s' to database -- IGNORING", 00451 vo->getName().c_str()); 00452 00453 rutz::shared_ptr<SimEventITOutput> 00454 objDataEvent(new SimEventITOutput(this, objData)); 00455 q.post(objDataEvent); 00456 } 00457 } 00458 00459 std::ofstream tableN(itsTable.getValString().c_str(), std::ios::out | std::ios::app); 00460 if (tableN) 00461 { 00462 tableN << itsSVMClass.getValString() << " " << ":" << " " << itsSVMId.getValString() << " " << std::endl; 00463 } 00464 tableN.close(); 00465 //Add a test to add only neww entry in the table 00466 00467 00468 00469 }//END TRAINING MODE 00470 00471 //TESTING MODE 00472 else if(itsITCMode.getVal().compare("Test") == 0) 00473 { 00474 LINFO("--------------------------Enter testing Mode---------------------"); 00475 00476 // crop around object using mask to compute SIFT around WTA winner 00477 objImg = getCroppedObject(objImg, smoothMask); 00478 if (!objImg.initialized()) 00479 { 00480 return; // no object image, so just do nothing 00481 } 00482 00483 //GIST 00484 if(itsCoarseReco.getVal()) 00485 { 00486 objImgGist = getCroppedObjectGist(objImgGist, smoothMask); 00487 computeGist(objImgGist); 00488 bool testFind = 0; 00489 std::map<double, int>::reverse_iterator iteratorPDFGist = itsPDFGist->rbegin(); 00490 float tresholdSIFT = 1.50; //arbitrary 00491 std::string nameMatchSIFT; 00492 while (testFind == 0 && iteratorPDFGist != itsPDFGist->rend()) 00493 { 00494 gistSelect(iteratorPDFGist); 00495 00496 //SIFT 00497 objData.reset(new TestImages::ObjData);; 00498 vo.reset(new VisualObject("NewObject", "NewObjet", objImg)); 00499 //get the matching objects: 00500 std::vector< rutz::shared_ptr<VisualObjectMatch> > matches; 00501 const uint nmatches = itsnewObjectDB->getObjectMatchesParallel(vo, matches, VOMA_KDTREEBBF); 00502 //use kd-tree fast but approximate for matching 00503 //const uint nmatches = itsObjectDB->getObjectMatches(vo, matches, VOMA_KDTREEBBF); 00504 LINFO("the number of potemtial matches are = %d", nmatches); 00505 if (nmatches == 0U) 00506 { 00507 iteratorPDFGist ++; 00508 nameMatchSIFT = "No_Match"; 00509 } 00510 else 00511 { 00512 rutz::shared_ptr<VisualObjectMatch> vomOne = matches[0]; 00513 if (vomOne->getScore() > tresholdSIFT) 00514 { 00515 testFind=1; 00516 rutz::shared_ptr<VisualObject> objSIFT = vomOne->getVoTest(); 00517 nameMatchSIFT = objSIFT->getName(); 00518 LINFO("----------------------object recognized with score %f-----------------", vomOne->getScore()); 00519 } 00520 else 00521 { 00522 iteratorPDFGist ++; 00523 nameMatchSIFT = "No_Match"; 00524 LINFO("------------------the SIFT score is not sufficient---------------"); 00525 } 00526 } 00527 00528 if (testFind==1 || iteratorPDFGist==itsPDFGist->rend()); 00529 { 00530 //to draw the polygon 00531 const float threshold = 1.0f; 00532 const Rectangle r = findBoundingRect(smoothMask, threshold); 00533 std::vector<Point2D<int> > polyvect(5); 00534 polyvect[0] = r.topLeft(); 00535 polyvect[1] = r.topRight(); 00536 polyvect[2] = r.bottomRight(); 00537 polyvect[3] = r.bottomLeft(); 00538 polyvect[4] = r.topLeft(); 00539 objData->polygon = polyvect; //std::vector<Point2D<int> > 00540 00541 //if no match, forget it: 00542 if (nmatches ==0U) 00543 { 00544 objData->name = "No Match"; //string 00545 objData->maxProb = 0; 00546 objData->dims = objImg.getDims(); 00547 } 00548 else 00549 { 00550 //record the result of the 1rst match found 00551 rutz::shared_ptr<VisualObjectMatch> voms = matches[0]; 00552 rutz::shared_ptr<VisualObject> objs = voms->getVoTest(); //get the visual object 00553 objData->name = objs->getName(); //string 00554 objData->maxProb = voms->getScore(); //double 00555 objData->dims = objImg.getDims(); //dims 00556 } 00557 rutz::shared_ptr<SimEventITOutput> 00558 objDataEvent(new SimEventITOutput(this, objData)); 00559 q.post(objDataEvent); 00560 } 00561 } 00562 00563 std::ofstream recoOutputFile(itsRecoSave.getValString().c_str(), std::ios::out | std::ios::app); 00564 if(recoOutputFile) 00565 { 00566 recoOutputFile << nameMatchSIFT << std::endl; 00567 recoOutputFile.close(); 00568 } 00569 00570 } 00571 else 00572 { 00573 //SIFT without gist 00574 std::string nameMatchSIFT; 00575 objData.reset(new TestImages::ObjData);; 00576 vo.reset(new VisualObject("NewObject", "NewObjet", objImg)); 00577 //get the matching objects: 00578 std::vector< rutz::shared_ptr<VisualObjectMatch> > matches; 00579 const uint nmatches = itsnewObjectDB->getObjectMatchesParallel(vo, matches, VOMA_KDTREEBBF); 00580 //use kd-tree fast but approximate for matching 00581 //const uint nmatches = itsObjectDB->getObjectMatches(vo, matches, VOMA_KDTREEBBF); 00582 const float threshold = 1.0f; 00583 const Rectangle r = findBoundingRect(smoothMask, threshold); 00584 std::vector<Point2D<int> > polyvect(5); 00585 polyvect[0] = r.topLeft(); 00586 polyvect[1] = r.topRight(); 00587 polyvect[2] = r.bottomRight(); 00588 polyvect[3] = r.bottomLeft(); 00589 polyvect[4] = r.topLeft(); 00590 objData->polygon = polyvect; //std::vector<Point2D<int> > 00591 00592 LINFO("the number of potemtial matches are = %d", nmatches); 00593 if (nmatches == 0U) 00594 { 00595 nameMatchSIFT = "No_Match"; 00596 } 00597 else 00598 { 00599 rutz::shared_ptr<VisualObjectMatch> vomOne = matches[0]; 00600 rutz::shared_ptr<VisualObject> objSIFT = vomOne->getVoTest(); 00601 nameMatchSIFT = objSIFT->getName(); 00602 LINFO("----------------------object recognized with score %f-----------------", vomOne->getScore()); 00603 00604 //to draw the polygon 00605 const float threshold = 1.0f; 00606 const Rectangle r = findBoundingRect(smoothMask, threshold); 00607 std::vector<Point2D<int> > polyvect(5); 00608 polyvect[0] = r.topLeft(); 00609 polyvect[1] = r.topRight(); 00610 polyvect[2] = r.bottomRight(); 00611 polyvect[3] = r.bottomLeft(); 00612 polyvect[4] = r.topLeft(); 00613 objData->polygon = polyvect; //std::vector<Point2D<int> > 00614 } 00615 00616 //if no match, forget it: 00617 if (nmatches ==0U) 00618 { 00619 objData->name = "No Match"; //string 00620 objData->maxProb = 0; 00621 objData->dims = objImg.getDims(); 00622 } 00623 else 00624 { 00625 //record the result of the 1rst match found 00626 rutz::shared_ptr<VisualObjectMatch> voms = matches[0]; 00627 rutz::shared_ptr<VisualObject> objs = voms->getVoTest(); //get the visual object 00628 objData->name = objs->getName(); //string 00629 objData->maxProb = voms->getScore(); //double 00630 objData->dims = objImg.getDims(); //dims 00631 } 00632 rutz::shared_ptr<SimEventITOutput> 00633 objDataEvent2(new SimEventITOutput(this, objData)); 00634 q.post(objDataEvent2); 00635 std::ofstream recoOutputFile(itsRecoSave.getValString().c_str(), std::ios::out | std::ios::app); 00636 if(recoOutputFile) 00637 { 00638 recoOutputFile << nameMatchSIFT << std::endl; 00639 recoOutputFile.close(); 00640 } 00641 } 00642 } 00643 else 00644 LFATAL("Unknown IT Mode type %s", itsITCMode.getVal().c_str()); 00645 } 00646 00647 // ###################################################################### 00648 std::string InferoTemporalSIFT::getObjNameAtLoc(const std::vector<TestImages::ObjData> &objects, const Point2D<int>& loc) 00649 { 00650 //will be usefull for comparison with the ground truth given with a XML file given 00651 for(uint obj=0; obj<objects.size(); obj++) 00652 { 00653 TestImages::ObjData objData = objects[obj]; 00654 //uint nbobjtoreco = objects.size(); 00655 00656 //find the object dimention from the polygon 00657 if (objData.polygon.size() > 0) 00658 { 00659 Point2D<int> upperLeft = objData.polygon[0]; 00660 Point2D<int> lowerRight = objData.polygon[0]; 00661 for(uint i=0; i<objData.polygon.size(); i++) 00662 { 00663 //find the bounds for the crop 00664 if (objData.polygon[i].i < upperLeft.i) upperLeft.i = objData.polygon[i].i; 00665 if (objData.polygon[i].j < upperLeft.j) upperLeft.j = objData.polygon[i].j; 00666 if (objData.polygon[i].i > lowerRight.i) lowerRight.i = objData.polygon[i].i; 00667 if (objData.polygon[i].j > lowerRight.j) lowerRight.j = objData.polygon[i].j; 00668 } 00669 //check if point is within the polygon 00670 for(int y=upperLeft.j; y<lowerRight.j; y++) 00671 for(int x=upperLeft.i; x<lowerRight.i; x++) 00672 { 00673 if (pnpoly(objData.polygon, loc)) 00674 return objData.name; 00675 } 00676 } 00677 } 00678 return std::string("Unknown"); 00679 } 00680 00681 //vo.reset(new VisualObject("NewObject", "NewObjet", objImg)); 00682 00683 // ###################################################################### 00684 void InferoTemporalSIFT::getObjDBToMatch(const char *dir) 00685 { 00686 LINFO("filename is %s", dir); 00687 if (isDirectory(dir)) 00688 { 00689 LINFO("---------------------------------Directory not folder--------------------------"); 00690 DIR *dp = opendir(dir); 00691 dirent *dirp; 00692 while( (dirp = readdir(dp)) ) 00693 { 00694 if(dirp->d_name[0] != '.') 00695 { 00696 //std::string fil = dir; 00697 //fil.append(dirp->d_name); 00698 std::string fil = sformat("%s%s", dir, dirp->d_name); 00699 if (isDirectory(dirp)) 00700 { 00701 getObjDBToMatch(fil.c_str()); 00702 } 00703 else 00704 { 00705 std::string lobjName = fil.c_str(); 00706 LINFO("--------------------the path name loaded is %s----------------------", fil.c_str()); 00707 //the depth of the taxonomy should be 2 file/class/item 00708 //objname in the format taxonomy:class:item 00709 lobjName = lobjName.erase(lobjName.rfind("."), lobjName.size()); //remove .png 00710 std::string item = lobjName; 00711 item = item.erase(item.find("/"), item.rfind("/")+1); 00712 std::string auxname = lobjName; 00713 auxname = auxname.erase(auxname.rfind("/"), auxname.size()); 00714 std::string classobj = auxname; 00715 classobj = classobj.erase(classobj.find("/"), classobj.rfind("/")+1); 00716 auxname = auxname.erase(auxname.rfind("/"), auxname.size()); 00717 std::string taxoname = auxname; 00718 taxoname = taxoname.erase(taxoname.find("/"), taxoname.rfind("/")+1); 00719 //objName = objName.erase(objName.find("/"), objName.rfind("/")+1); 00720 00721 std::string objName = taxoname; 00722 objName.append(":"); 00723 objName.append(classobj); 00724 objName.append(":"); 00725 objName.append(item); 00726 //create a new visual object database with only the object that belong to the path 00727 //itsnewObjectDB->addObject(itsObjectDB->getObject(objName)); 00728 00729 rutz::shared_ptr<VisualObject> myvo; 00730 myvo = itsObjectDB->getObject(objName); 00731 //LINFO("--------------------------getobj 1 num %u------------------------", itsObjectDB->numObjects()); 00732 itsnewObjectDB->addObject(myvo); 00733 } 00734 } //check add all the object to the visualObjectDB 00735 } 00736 closedir(dp); 00737 } 00738 else 00739 { 00740 std::string filb = sformat("%s", dir); 00741 std::string objName = filb.c_str(); 00742 objName = objName.erase(objName.rfind("."), objName.size()); 00743 objName = objName.erase(objName.find("/"), objName.rfind("/")+1); 00744 //create a new visual object database with only the object that belong to the path 00745 itsnewObjectDB->addObject(itsObjectDB->getObject(objName)); 00746 } 00747 } 00748 00749 // ###################################################################### 00750 void InferoTemporalSIFT::computeGist(Image<PixRGB<byte> > objImg) 00751 { 00752 00753 LINFO("--------------------------Enter compute Gist ---------------------"); 00754 LINFO("------------------------------------------------------------------"); 00755 //itsVisualCortex->subComponent("orientation")->setModelParamVal("NumOrientations", 4); 00756 LINFO("-------------------------Before resetting itsVisualCortex----------------------------------------"); 00757 LINFO("------------------------------itsRaw VisualCortex chans = %s--------------------------", itsVisualCortex->getModelParamString("RawVisualCortexChans").c_str()); 00758 itsVisualCortex->reset(MC_RECURSE); 00759 LINFO("-------------------------After resetting itsVisualCortex----------------------------------------"); 00760 LINFO("------------------------------itsRaw VisualCortex chans = %s--------------------------", itsVisualCortex->getModelParamString("RawVisualCortexChans").c_str()); 00761 LINFO("------------------------------------------------------------------"); 00762 //itsVisualCortex->setModelParamVal("RawVisualCortexChans", "ICO", MC_RECURSE); 00763 00764 InputFrame imgIn = InputFrame::fromRgb(&objImg); 00765 //INFO("--------------------------S test Cropped obj 4 ---------------------"); 00766 //LINFO("--------------------------Size objImg H=%d W=%d ---------------------", objImg.getHeight(), objImg.getWidth()); 00767 00768 Image<float> vcomap = itsVisualCortex->getVCOutput(objImg); 00769 LINFO("Sophie:------------------------the Raw VC Output has for dim H=%d and W=%d----------------------", vcomap.getHeight(), vcomap.getWidth()); 00770 00771 // WORKS but comment to check the pyramide size 00772 rutz::shared_ptr<ChannelMaps> chMaps(new ChannelMaps(itsVisualCortex.get())); 00773 LINFO("------------get all the %u channel maps at the same time---------",chMaps->numSubchans()); 00774 00775 //debug 00776 for(uint i=0; i < chMaps->numSubchans(); i++) 00777 { 00778 //Grab the current channel 00779 rutz::shared_ptr<ChannelMaps> currChan = chMaps->subChanMaps(i); 00780 //Determine the name of the channel 00781 std::vector<std::string> chanNameVec; 00782 split(currChan->getMap().name(), ":", std::back_inserter(chanNameVec)); 00783 std::string chanName = chanNameVec[1]; 00784 LINFO("------------------------------------channel Map name is %s------------------------", chanName.c_str()); 00785 LINFO("------------------------------------channel Map has %u subchans ------------------------", currChan->numSubchans()); 00786 for(uint k=0; k < currChan->numSubchans(); k++) 00787 { 00788 rutz::shared_ptr<ChannelMaps> subsubchan = currChan->subChanMaps(k); 00789 //Determine the name of the channel 00790 std::vector<std::string> subchanNameVec; 00791 split(subsubchan->getMap().name(), ":", std::back_inserter(subchanNameVec)); 00792 std::string subchanName = subchanNameVec[1]; 00793 LINFO("------------------------------------sub channel Map name is %s------------------------", subchanName.c_str()); 00794 } 00795 00796 } 00797 00798 00799 00800 00801 00802 00803 // gist feature vector 00804 Image<float> gistVector; 00805 gistVector = itsGistEstim->compute(chMaps); 00806 int idOb; 00807 std::string objName; 00808 00809 LINFO("----------the gist has been computed-------------------------------"); 00810 00811 if(itsITCMode.getVal().compare("Train") == 0) 00812 { 00813 //to save the image crop used to compute the Gist 00814 /*std::string fileGist; 00815 fileGist = "/lab/sophie/SIFT_Gist_Darpa/results/Crop_Gist/"; 00816 fileGist.append(itsNameObj.getValString()); 00817 fileGist.append("_cropGist.png"); 00818 Raster::WriteRGB(objImg,fileGist); 00819 */ 00820 00821 std::string objId; 00822 idOb = atof(itsSVMId.getValString().c_str()); 00823 } 00824 else 00825 { 00826 idOb = 0; 00827 //objName = ""; 00828 } 00829 std::vector<float> vectGist(714); 00830 getVectorColumn(gistVector, &vectGist, 0); 00831 00832 00833 //Classifier 00834 SVMClassifier objClassifier; 00835 if(itsITCMode.getVal().compare("Train") == 0) 00836 { 00837 objClassifier.train(itsTrainSVM.getValString(), idOb, vectGist); 00838 } 00839 if(itsITCMode.getVal().compare("Test") == 0) 00840 { 00841 LINFO("------------------"); 00842 objClassifier.readModel(itsSVMModel.getValString()); 00843 objClassifier.readRange(itsSVMRange.getValString()); 00844 double labelP; 00845 double probaP; 00846 labelP = objClassifier.predict(vectGist, &probaP); 00847 00848 LINFO("----------------------------the label of the object is %f, with a probablility=%f--------------", labelP, probaP); 00849 00850 std::map<int, double> pdf = objClassifier.predictPDF(vectGist); 00851 std::vector<int> labelVal; 00852 for(std::map<int,double>::iterator pdfIt = pdf.begin(); pdfIt != pdf.end(); ++pdfIt) 00853 { 00854 labelVal.push_back(pdfIt->first); 00855 itsPDFGist->insert(std::make_pair(pdfIt->second, pdfIt->first)); 00856 //LINFO("-----------------pdfitsecond %f pdfitfirst %d -------------", pdfIt->second, pdfIt->first);// debug 00857 } 00858 //bool testmap2 = itsPDFGist->empty(); 00859 //LINFO("------------------------------map is empty %d --------------------------", testmap2); 00860 unsigned int nbLabel = labelVal.size(); 00861 LINFO("------------------------There is %d differnt label--------------------", nbLabel); 00862 std::ofstream recoOutputFile(itsRecoSave.getValString().c_str(), std::ios::out | std::ios::app); 00863 if(recoOutputFile) 00864 { 00865 recoOutputFile << itsSVMId.getValString() << " " << itsNameObj.getValString() << " " << labelP << " " ; 00866 00867 for(unsigned int it=0; it < labelVal.size(); ++it) 00868 { 00869 int itlabel = labelVal[it]; 00870 double valproba = pdf[itlabel]; 00871 recoOutputFile << itlabel << ":" << valproba << " "; 00872 //LINFO("-----------------the object %d is belonging to category %d with proba=%f-------------", idOb, itlabel, valproba); 00873 } 00874 recoOutputFile.close(); 00875 } 00876 00877 //test new map 00878 std::vector<int> Valbis; 00879 for (std::map<double, int>::iterator ite = itsPDFGist->begin(); ite != itsPDFGist->end(); ++ite) 00880 { 00881 Valbis.push_back(ite->first); 00882 // LINFO("-----------------the value %f is from the category %d ------------",ite->first, ite->second); 00883 } 00884 00885 } 00886 } 00887 00888 // ###################################################################### 00889 00890 void InferoTemporalSIFT::gistSelect(std::map<double, int>::reverse_iterator iteratorPDFGist) 00891 { 00892 std::map<int, std::string> tableIdClass; 00893 std::ifstream tableNa(itsTable.getValString().c_str(), std::ios::in); 00894 00895 if (tableNa) 00896 { 00897 while( !tableNa.eof() ) 00898 { 00899 int Id; 00900 std::string categ; 00901 std::string aux; 00902 // std::getline(std::cin, objName, '\n'); 00903 tableNa >> categ >> aux >> Id; 00904 tableIdClass.insert(std::make_pair(Id, categ)); 00905 // LINFO("---------------------------------categ=%s---------------------", categ.c_str()); 00906 //LINFO("---------------------------------id=%d---------------------", Id); 00907 } 00908 tableNa.close(); 00909 } 00910 00911 std::string filenamerecogist; 00912 00913 //loop on map to find the matching point 00914 //possibility to add trick as the map will be sort by Id 00915 bool resF = 0; 00916 00917 std::map<int,std::string>::iterator tabIt = tableIdClass.begin(); 00918 while (tabIt != tableIdClass.end() && resF == 0) 00919 { 00920 LINFO("------------------------gist said %d--------------------------", iteratorPDFGist->second); 00921 LINFO("------------------------comparing to %d %s--------------------------", tabIt->first, tabIt->second.c_str()); 00922 00923 if(iteratorPDFGist->second == tabIt->first) 00924 { 00925 resF = 1; 00926 filenamerecogist = itsPathMatch.getValString(); 00927 //filenamerecogist.append("/"); 00928 //FOR THE DEMO NOT REAL TAXONOMY!! 00929 //if (tabIt->second =="candy" || tabIt->second =="tea") 00930 // filenamerecogist = filenamerecogist.append("food/"); 00931 //else if (tabIt->second=="case" || tabIt->second=="lotion") 00932 // filenamerecogist = filenamerecogist.append("object/"); 00933 //else 00934 // filenamerecogist = filenamerecogist.append("document/"); 00935 00936 filenamerecogist.append(tabIt->second); //itsSVMClass.getValString()); 00937 filenamerecogist.append("/"); 00938 //LINFO("-------------------------------filenameRecogist-----------------------%s----------------------------", filenamerecogist.c_str()); 00939 } 00940 else if(iteratorPDFGist->second < tabIt->first) 00941 { 00942 LINFO("------------------------ERROR NO MATCH FOUND CHECK THE TABLE IS NOT CORRECT---------------------"); 00943 tabIt ++; 00944 } 00945 else 00946 { 00947 tabIt ++; 00948 LINFO("------just test the other class----"); 00949 } 00950 } 00951 00952 LINFO("--------------try to match with SIFT-------"); 00953 00954 getObjDBToMatch(filenamerecogist.c_str()); 00955 00956 00957 00958 00959 } 00960 00961 00962 // ###################################################################### 00963 /* So things look consistent in everyone's emacs... */ 00964 /* Local Variables: */ 00965 /* indent-tabs-mode: nil */ 00966 /* End: */ 00967