00001 /*!@file TestSuite/test-ObjDec.C Test Varius object detection code */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00005 // University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Lior Elazary 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/TestSuite/test-ObjDet.C $ 00035 // $Id: test-ObjDet.C 12962 2010-03-06 02:13:53Z irock $ 00036 // 00037 00038 #include "Component/GlobalOpts.H" 00039 #include "Component/ModelManager.H" 00040 #include "Component/ModelOptionDef.H" 00041 #include "Component/ModelParam.H" 00042 #include "Component/ModelParamBatch.H" 00043 #include "GUI/XWindow.H" 00044 #include "Image/Image.H" 00045 #include "Image/ColorOps.H" 00046 #include "Image/CutPaste.H" 00047 #include "Image/ShapeOps.H" 00048 #include "Image/Rectangle.H" 00049 #include "Image/MathOps.H" 00050 #include "Image/MatrixOps.H" 00051 #include "Image/Transforms.H" 00052 #include "Image/Convolutions.H" 00053 #include "Media/FrameSeries.H" 00054 #include "Media/TestImages.H" 00055 #include "nub/ref.h" 00056 #include "Raster/GenericFrame.H" 00057 #include "Transport/FrameInfo.H" 00058 #include "Raster/Raster.H" 00059 #include "Util/Types.H" 00060 #include "Util/log.H" 00061 #include "Util/Timer.H" 00062 #include "TestSuite/ObjDetBrain.h" 00063 #include "GUI/DebugWin.H" 00064 00065 00066 00067 //Other libs so that we link eginst them 00068 #include "Image/DrawOps.H" 00069 #include "Image/FilterOps.H" 00070 #include "Image/fancynorm.H" 00071 #include "Neuro/EnvVisualCortex.H" 00072 #include "Neuro/getSaliency.H" 00073 #include "nub/ref.h" 00074 #include "Util/MathFunctions.H" 00075 00076 00077 00078 00079 #include <fstream> 00080 #include <iostream> 00081 #include <iomanip> 00082 #include <string> 00083 #include <unistd.h> 00084 #include <cstdlib> 00085 #include <cstdlib> 00086 #include <dlfcn.h> 00087 00088 static const ModelOptionDef OPT_ObjDetTrainingMode = 00089 { MODOPT_FLAG, "ObjDetTrainingMode", &MOC_GENERAL, OPTEXP_CORE, 00090 "Whether to traing the classifier or detect ", 00091 "training-mode", '\0', "", "false" }; 00092 00093 static const ModelOptionDef OPT_ObjDetFilterObject = 00094 { MODOPT_ARG_STRING, "ObjDetFilterObject", &MOC_GENERAL, OPTEXP_CORE, 00095 "Binary recognition. Is this object there or not. ", 00096 "filter-object", '\0', "<string>", "" }; 00097 00098 static const ModelOptionDef OPT_ObjDetOutputROCFile = 00099 { MODOPT_ARG_STRING, "ObjDetOutputROCFile", &MOC_GENERAL, OPTEXP_CORE, 00100 "The file name to output the ROC data to. ", 00101 "roc-file", '\0', "<string>", "" }; 00102 00103 static const ModelOptionDef OPT_ObjDetOutputTimingFile = 00104 { MODOPT_ARG_STRING, "ObjDetOutputTimingFile", &MOC_GENERAL, OPTEXP_CORE, 00105 "The file name to output timing information. ", 00106 "timing-file", '\0', "<string>", "" }; 00107 00108 static const ModelOptionDef OPT_ObjDetOutputResultsFile = 00109 { MODOPT_ARG_STRING, "ObjDetOutputResultsFile", &MOC_GENERAL, OPTEXP_CORE, 00110 "The file name to output full results information to. " 00111 "This will include the frame number, the scene filename, which object we " 00112 "had, what we labeled it and the confidence. Only for recognition mode.", 00113 "results-file", '\0', "<string>", "" }; 00114 00115 struct ResultData 00116 { 00117 int frame; 00118 std::string objName; 00119 std::string labelName; 00120 float confidence; 00121 00122 ResultData(int f, std::string& obj, std::string& label, float c) : 00123 frame(f), 00124 objName(obj), 00125 labelName(label), 00126 confidence(c) 00127 {} 00128 }; 00129 00130 bool ResultDataCmp(const ResultData& r1, const ResultData& r2) 00131 { 00132 return r1.confidence > r2.confidence; 00133 } 00134 00135 bool DetLocationCmp(const DetLocation& r1, const DetLocation& r2) 00136 { 00137 return r1.val > r2.val; 00138 } 00139 00140 int main(const int argc, const char **argv) 00141 { 00142 00143 MYLOGVERB = LOG_INFO; 00144 ModelManager manager("Test Object Det"); 00145 00146 OModelParam<bool> optTrainingMode(&OPT_ObjDetTrainingMode, &manager); 00147 OModelParam<std::string> optFilterObject(&OPT_ObjDetFilterObject, &manager); 00148 OModelParam<std::string> optOutputROCFile(&OPT_ObjDetOutputROCFile, &manager); 00149 OModelParam<std::string> optOutputTimingFile(&OPT_ObjDetOutputTimingFile, &manager); 00150 OModelParam<std::string> optOutputResultsFile(&OPT_ObjDetOutputResultsFile, &manager); 00151 00152 nub::ref<InputFrameSeries> ifs(new InputFrameSeries(manager)); 00153 manager.addSubComponent(ifs); 00154 00155 nub::ref<OutputFrameSeries> ofs(new OutputFrameSeries(manager)); 00156 manager.addSubComponent(ofs); 00157 00158 manager.exportOptions(MC_RECURSE); 00159 00160 00161 //Get all the args for this module up to -- 00162 int nModelArgs = 0; 00163 for(int i=0; i<argc; i++) 00164 { 00165 if (!strcmp(argv[i], "--")) 00166 break; 00167 nModelArgs++; 00168 } 00169 00170 00171 00172 if (manager.parseCommandLine( 00173 (const int)nModelArgs, (const char**)argv, "<ObjDetBrainLib>", 1, 1) == false) 00174 return 1; 00175 00176 std::string libFile = manager.getExtraArg(0); 00177 LDEBUG("Loading %s", libFile.c_str()); 00178 void* brainLib = dlopen(libFile.c_str(), RTLD_LAZY ); 00179 if (!brainLib) 00180 LFATAL("Can load library: %s (%s)", libFile.c_str(), dlerror()); 00181 00182 //Load the symbols 00183 dlerror(); //reset any errors 00184 CreateObjDetBrain* createBrain = (CreateObjDetBrain*) dlsym(brainLib, "createObjDetBrain"); 00185 DestoryObjDetBrain* destoryBrain = (DestoryObjDetBrain*) dlsym(brainLib, "destoryObjDetBrain"); 00186 00187 if (!createBrain || !destoryBrain) 00188 LFATAL("Can not find the create and destory symbols: %s", dlerror()); 00189 00190 int extraArgc = argc - nModelArgs; 00191 const char** extraArgv = &argv[nModelArgs]; 00192 00193 if (extraArgc == 0) 00194 { 00195 extraArgc = 1; 00196 extraArgv = argv; 00197 } 00198 00199 ObjDetBrain* brain = createBrain(extraArgc, extraArgv); 00200 00201 manager.start(); 00202 00203 ifs->startStream(); 00204 00205 Timer timer; 00206 FILE* timingFP = NULL; 00207 if (optOutputTimingFile.getVal().size() > 0) 00208 { 00209 timingFP = fopen(optOutputTimingFile.getVal().c_str(), "w"); 00210 if (timingFP == NULL) 00211 LFATAL("Can not open timing file: %s", 00212 optOutputTimingFile.getVal().c_str()); 00213 } 00214 00215 00216 timer.reset(); 00217 if (optTrainingMode.getVal()) 00218 brain->preTraining(); 00219 else 00220 brain->preDetection(); 00221 float preTime = timer.getSecs(); 00222 00223 if (timingFP) 00224 fprintf(timingFP, "%s %f\n", 00225 optTrainingMode.getVal() ? "PreTraining" : "PreDetection", 00226 preTime); 00227 00228 FILE* resultsFP = NULL; 00229 if (optOutputResultsFile.getVal().size() > 0 && !optTrainingMode.getVal()) 00230 { 00231 resultsFP = fopen(optOutputResultsFile.getVal().c_str(), "w"); 00232 if (resultsFP == NULL) 00233 LFATAL("Can not open results file: %s", 00234 optOutputResultsFile.getVal().c_str()); 00235 } 00236 std::vector<ResultData> results; 00237 00238 double totalTime = 0; 00239 unsigned long totalNumFrames = 0; 00240 //unsigned long long totalNumFixations = 0; 00241 double firstObjectMeanFixations = 0; 00242 double firstObjectStvarFixations = 0; 00243 double allObjectsMeanFixations = 0; 00244 double allObjectsStvarFixations = 0; 00245 00246 int numOfBins = 1000; 00247 std::vector<double> totalTruePositive(numOfBins, 0); 00248 std::vector<double> totalFalseNegative(numOfBins, 0); 00249 00250 while(1) 00251 { 00252 Image< PixRGB<byte> > inputImg; 00253 const FrameState is = ifs->updateNext(); 00254 if (is == FRAME_COMPLETE) 00255 break; 00256 00257 //grab the images 00258 GenericFrame input = ifs->readFrame(); 00259 if (!input.initialized()) 00260 break; 00261 inputImg = input.asRgb(); 00262 00263 totalNumFrames++; 00264 00265 //Get the metadata and find if we have the object name in the scene 00266 rutz::shared_ptr<GenericFrame::MetaData> 00267 metaData = input.getMetaData(std::string("SceneData")); 00268 if (metaData.get() != 0) { 00269 rutz::shared_ptr<TestImages::SceneData> sceneData; 00270 sceneData.dyn_cast_from(metaData); 00271 00272 ObjectData labeledObj; 00273 00274 if (optFilterObject.getVal().size() > 0) 00275 { 00276 labeledObj.name = "no_" + optFilterObject.getVal(); 00277 labeledObj.confidence = -1; 00278 00279 //Sech and see if we have this object in the scene 00280 for (uint i = 0; i < sceneData->objects.size(); i++) { 00281 TestImages::ObjData objData = sceneData->objects[i]; 00282 if (optFilterObject.getVal() == objData.name) 00283 labeledObj.name = objData.name; 00284 } 00285 } else { 00286 //Take the first object 00287 for (uint i = 0; i < sceneData->objects.size() && i<1; i++) { 00288 TestImages::ObjData objData = sceneData->objects[i]; 00289 labeledObj.name = objData.name; 00290 } 00291 } 00292 00293 double frameTime = -1; 00294 if (optTrainingMode.getVal()) 00295 { 00296 timer.reset(); 00297 brain->onTraining(inputImg, labeledObj); 00298 frameTime = timer.getSecs(); 00299 } else { 00300 timer.reset(); 00301 std::vector<DetLocation> smap = brain->onDetection(inputImg); 00302 frameTime = timer.getSecs(); 00303 00304 //Sort the results 00305 std::sort(smap.begin(), smap.end(), DetLocationCmp); 00306 00307 Image<float> objsMask(inputImg.getDims(), ZEROS); 00308 for (uint obj = 0; obj < sceneData->objects.size(); obj++) { 00309 TestImages::ObjData objData = sceneData->objects[obj]; 00310 drawFilledPolygon(objsMask, objData.polygon, (float)(obj+1)); 00311 } 00312 00313 unsigned long totalObjects = 0; 00314 unsigned long totalNonObjects = 0; 00315 for(uint i=0; i<objsMask.size(); i++) 00316 if (objsMask[i] > 0) 00317 totalObjects++; 00318 else 00319 totalNonObjects++; 00320 00321 //Check if this location has hit an object 00322 unsigned long numFixations = 0; 00323 //unsigned long numObjects = 0; 00324 00325 unsigned long fixationsToFirstObject = 0; 00326 unsigned long fixationsToAllObjects = 0; 00327 00328 std::vector<double> truePositive(numOfBins, 0); 00329 std::vector<double> falseNegative(numOfBins, 0); 00330 00331 if (smap.size() != objsMask.size()) 00332 LFATAL("Smap needs to be the same size as the image"); 00333 int binNum = 0; 00334 double fixPerBin = double(smap.size())/double(numOfBins); 00335 for (uint i=0; i<smap.size(); i++) 00336 { 00337 Point2D<int> loc(smap[i].i, smap[i].j); 00338 float objID = objsMask.getVal(loc); 00339 00340 numFixations++; 00341 if ((numFixations-1) > fixPerBin*(binNum+1)) 00342 { 00343 //New bin number 00344 int prevBinNum = binNum; 00345 binNum++; 00346 if(binNum >= numOfBins) 00347 LFATAL("binNum(%i) >= numOfBins(%i)", binNum, numOfBins); 00348 00349 truePositive[binNum] = truePositive[prevBinNum]; 00350 falseNegative[binNum] = falseNegative[prevBinNum]; 00351 } 00352 00353 if (objID > 0) 00354 truePositive[binNum]++; 00355 else 00356 falseNegative[binNum]++; 00357 } 00358 00359 //Aggragate values over frames 00360 for(int i=0; i<numOfBins; i++) 00361 { 00362 totalTruePositive[i] += (truePositive[i]/totalObjects); 00363 totalFalseNegative[i] += (falseNegative[i]/totalNonObjects); 00364 } 00365 00366 //for(uint i=0; i<smap.size(); i++) 00367 //{ 00368 // Point2D<int> loc(smap[i].i, smap[i].j); 00369 // float objID = objsMask.getVal(loc); 00370 // numFixations++; 00371 // totalNumFixations++; 00372 // if (objID > 0) 00373 // { 00374 // numObjects++; 00375 // //We got an object 00376 // if (fixationsToFirstObject == 0) 00377 // fixationsToFirstObject = numFixations; 00378 // if (fixationsToAllObjects == 0 && 00379 // numObjects == totalObjects) 00380 // fixationsToAllObjects = numFixations; 00381 // } 00382 //} 00383 //printf("%lu %lu %lu %lu\n", numFixations, smap.size(), numObjects, totalObjects); 00384 00385 //printf("%lu %lu %f %lu %lu %f\n", 00386 // fixationsToFirstObject, numFixations, 00387 // (double)fixationsToFirstObject/(double)numFixations, 00388 // fixationsToAllObjects, totalObjects, 00389 // (double)fixationsToAllObjects/(double)totalObjects); 00390 00391 //fflush(stdout); 00392 //calculate the mean and std of the fixations online 00393 00394 //Calc out of the total number of fixations, how many of these reached 00395 //the first object 00396 const double prevFirstMean = firstObjectMeanFixations; 00397 const double percFixationsToFirstObject = 00398 (double)fixationsToFirstObject/(double)numFixations; 00399 const double firstDelta = percFixationsToFirstObject - firstObjectMeanFixations; 00400 firstObjectMeanFixations += firstDelta/totalNumFrames; 00401 firstObjectStvarFixations += ( 00402 ( percFixationsToFirstObject - prevFirstMean)* 00403 (percFixationsToFirstObject - firstObjectMeanFixations) 00404 ); 00405 00406 00407 //Calc out of the total number of fixations needed to reach all the objects 00408 // how many of these reached all the objects 00409 const double prevAllMean = allObjectsMeanFixations; 00410 const double percFixationsToAllObjects = 00411 (double)fixationsToAllObjects/(double)totalObjects; 00412 const double allDelta = percFixationsToAllObjects - allObjectsMeanFixations; 00413 allObjectsMeanFixations += allDelta/totalNumFrames; 00414 allObjectsStvarFixations += ( 00415 ( percFixationsToAllObjects - prevAllMean)* 00416 (percFixationsToAllObjects - allObjectsMeanFixations) 00417 ); 00418 } 00419 00420 if (timingFP) 00421 fprintf(timingFP, "%i %f\n", 00422 ifs->frame(), frameTime); 00423 totalTime += frameTime; 00424 00425 } 00426 ofs->writeRGB(inputImg, "input", FrameInfo("input", SRC_POS)); 00427 usleep(10000); 00428 } 00429 00430 00431 if (resultsFP) 00432 fclose(resultsFP); 00433 00434 timer.reset(); 00435 if (optTrainingMode.getVal()) 00436 brain->postTraining(); 00437 else 00438 brain->postDetection(); 00439 float postTime = timer.getSecs(); 00440 if (timingFP) 00441 fprintf(timingFP, "%s %f\n", 00442 optTrainingMode.getVal() ? "PostTraining" : "PostDetection", 00443 postTime); 00444 00445 if (timingFP) 00446 fclose(timingFP); 00447 00448 //Calculate ROC curve and AP 00449 if (!optTrainingMode.getVal()) 00450 { 00451 00452 //Normalize by number of frames 00453 for(int i=0; i<numOfBins; i++) 00454 { 00455 totalFalseNegative[i] /= totalNumFrames; 00456 totalTruePositive[i] /= totalNumFrames; 00457 } 00458 00459 ////Output the roc curve 00460 FILE* rocFP = NULL; 00461 if (optOutputROCFile.getVal().size() > 0) 00462 { 00463 rocFP = fopen(optOutputROCFile.getVal().c_str(), "w"); 00464 if (rocFP == NULL) 00465 LFATAL("Can not open roc file: %s", 00466 optOutputROCFile.getVal().c_str()); 00467 } 00468 if (rocFP) 00469 { 00470 for(int i=0; i<numOfBins; i++) 00471 fprintf(rocFP, "%f %f\n", totalFalseNegative[i], totalTruePositive[i]); 00472 fclose(rocFP); 00473 } 00474 00475 //Calculate the average true pos 00476 double ap=0; 00477 double step = 0.1; 00478 for(double t=0; t<=1; t+=step) 00479 { 00480 double maxPrec = 0; 00481 for(int i=0; i<numOfBins; i++) 00482 { 00483 if (totalFalseNegative[i] >= t) 00484 if (totalTruePositive[i] > maxPrec) 00485 maxPrec = totalTruePositive[i]; 00486 } 00487 ap += (maxPrec / ((1/step)+1) ); //take the average 00488 } 00489 00490 printf("Stats: Frames:%lu FPS:%f Ap:%f\n", 00491 totalNumFrames, 00492 (double)totalNumFrames/totalTime, 00493 ap); 00494 fflush(stdout); 00495 } else { 00496 printf("Stats: Frames:%lu FPS:%f \n", 00497 totalNumFrames, (double)totalNumFrames/totalTime); 00498 } 00499 00500 00501 00502 destoryBrain(brain); 00503 00504 //unload the library 00505 dlclose(brainLib); 00506 00507 00508 return 0; 00509 } 00510 00511 00512 // ###################################################################### 00513 /* So things look consistent in everyone's emacs... */ 00514 /* Local Variables: */ 00515 /* indent-tabs-mode: nil */ 00516 /* End: */