test-ObjDet.C

00001 /*!@file TestSuite/test-ObjDec.C Test Varius object detection code */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the //
00005 // University of Southern California (USC) and the iLab at USC.         //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: Lior Elazary
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/TestSuite/test-ObjDet.C $
00035 // $Id: test-ObjDet.C 12962 2010-03-06 02:13:53Z irock $
00036 //
00037 
00038 #include "Component/GlobalOpts.H"
00039 #include "Component/ModelManager.H"
00040 #include "Component/ModelOptionDef.H"
00041 #include "Component/ModelParam.H"
00042 #include "Component/ModelParamBatch.H"
00043 #include "GUI/XWindow.H"
00044 #include "Image/Image.H"
00045 #include "Image/ColorOps.H"
00046 #include "Image/CutPaste.H"
00047 #include "Image/ShapeOps.H"
00048 #include "Image/Rectangle.H"
00049 #include "Image/MathOps.H"
00050 #include "Image/MatrixOps.H"
00051 #include "Image/Transforms.H"
00052 #include "Image/Convolutions.H"
00053 #include "Media/FrameSeries.H"
00054 #include "Media/TestImages.H"
00055 #include "nub/ref.h"
00056 #include "Raster/GenericFrame.H"
00057 #include "Transport/FrameInfo.H"
00058 #include "Raster/Raster.H"
00059 #include "Util/Types.H"
00060 #include "Util/log.H"
00061 #include "Util/Timer.H"
00062 #include "TestSuite/ObjDetBrain.h"
00063 #include "GUI/DebugWin.H"
00064 
00065 
00066 
00067 //Other libs so that we link eginst them
00068 #include "Image/DrawOps.H"
00069 #include "Image/FilterOps.H"
00070 #include "Image/fancynorm.H"
00071 #include "Neuro/EnvVisualCortex.H"
00072 #include "Neuro/getSaliency.H"
00073 #include "nub/ref.h"
00074 #include "Util/MathFunctions.H"
00075 
00076 
00077 
00078 
00079 #include <fstream>
00080 #include <iostream>
00081 #include <iomanip>
00082 #include <string>
00083 #include <unistd.h>
00084 #include <cstdlib>
00085 #include <cstdlib>
00086 #include <dlfcn.h>
00087 
00088 static const ModelOptionDef OPT_ObjDetTrainingMode =
00089   { MODOPT_FLAG, "ObjDetTrainingMode", &MOC_GENERAL, OPTEXP_CORE,
00090     "Whether to traing the classifier or detect ",
00091     "training-mode", '\0', "", "false" };
00092 
00093 static const ModelOptionDef OPT_ObjDetFilterObject =
00094   { MODOPT_ARG_STRING, "ObjDetFilterObject", &MOC_GENERAL, OPTEXP_CORE,
00095     "Binary recognition. Is this object there or not. ",
00096     "filter-object", '\0', "<string>", "" };
00097 
00098 static const ModelOptionDef OPT_ObjDetOutputROCFile =
00099   { MODOPT_ARG_STRING, "ObjDetOutputROCFile", &MOC_GENERAL, OPTEXP_CORE,
00100     "The file name to output the ROC data to. ",
00101     "roc-file", '\0', "<string>", "" };
00102 
00103 static const ModelOptionDef OPT_ObjDetOutputTimingFile =
00104   { MODOPT_ARG_STRING, "ObjDetOutputTimingFile", &MOC_GENERAL, OPTEXP_CORE,
00105     "The file name to output timing information. ",
00106     "timing-file", '\0', "<string>", "" };
00107 
00108 static const ModelOptionDef OPT_ObjDetOutputResultsFile =
00109   { MODOPT_ARG_STRING, "ObjDetOutputResultsFile", &MOC_GENERAL, OPTEXP_CORE,
00110     "The file name to output full results information to. "
00111     "This will include the frame number, the scene filename, which object we "
00112     "had, what we labeled it and the confidence. Only for recognition mode.",
00113     "results-file", '\0', "<string>", "" };
00114 
00115 struct ResultData
00116 {
00117   int frame;
00118   std::string objName;
00119   std::string labelName;
00120   float confidence;
00121 
00122   ResultData(int f, std::string& obj, std::string& label, float c) :
00123     frame(f),
00124     objName(obj),
00125     labelName(label),
00126     confidence(c)
00127   {}
00128 };
00129 
00130 bool ResultDataCmp(const ResultData& r1, const ResultData& r2)
00131 {
00132   return r1.confidence > r2.confidence;
00133 }
00134 
00135 bool DetLocationCmp(const DetLocation& r1, const DetLocation& r2)
00136 {
00137   return r1.val > r2.val;
00138 }
00139 
00140 int main(const int argc, const char **argv)
00141 {
00142 
00143   MYLOGVERB = LOG_INFO;
00144   ModelManager manager("Test Object Det");
00145 
00146   OModelParam<bool> optTrainingMode(&OPT_ObjDetTrainingMode, &manager);
00147   OModelParam<std::string> optFilterObject(&OPT_ObjDetFilterObject, &manager);
00148   OModelParam<std::string> optOutputROCFile(&OPT_ObjDetOutputROCFile, &manager);
00149   OModelParam<std::string> optOutputTimingFile(&OPT_ObjDetOutputTimingFile, &manager);
00150   OModelParam<std::string> optOutputResultsFile(&OPT_ObjDetOutputResultsFile, &manager);
00151 
00152   nub::ref<InputFrameSeries> ifs(new InputFrameSeries(manager));
00153   manager.addSubComponent(ifs);
00154 
00155   nub::ref<OutputFrameSeries> ofs(new OutputFrameSeries(manager));
00156   manager.addSubComponent(ofs);
00157 
00158   manager.exportOptions(MC_RECURSE);
00159 
00160 
00161   //Get all the args for this module up to --
00162   int nModelArgs = 0;
00163   for(int i=0; i<argc; i++)
00164   {
00165     if (!strcmp(argv[i], "--"))
00166       break;
00167     nModelArgs++;
00168   }
00169 
00170 
00171 
00172   if (manager.parseCommandLine(
00173         (const int)nModelArgs, (const char**)argv, "<ObjDetBrainLib>", 1, 1) == false)
00174     return 1;
00175 
00176   std::string libFile = manager.getExtraArg(0);
00177   LDEBUG("Loading %s", libFile.c_str());
00178   void* brainLib = dlopen(libFile.c_str(), RTLD_LAZY );
00179   if (!brainLib)
00180     LFATAL("Can load library: %s (%s)", libFile.c_str(), dlerror());
00181 
00182   //Load the symbols
00183   dlerror(); //reset any errors
00184   CreateObjDetBrain* createBrain = (CreateObjDetBrain*) dlsym(brainLib, "createObjDetBrain");
00185   DestoryObjDetBrain* destoryBrain = (DestoryObjDetBrain*) dlsym(brainLib, "destoryObjDetBrain");
00186 
00187   if (!createBrain  || !destoryBrain)
00188     LFATAL("Can not find the create and destory symbols: %s", dlerror());
00189 
00190   int extraArgc = argc - nModelArgs;
00191   const char** extraArgv = &argv[nModelArgs];
00192 
00193   if (extraArgc == 0)
00194   {
00195     extraArgc = 1;
00196     extraArgv = argv;
00197   }
00198 
00199   ObjDetBrain* brain = createBrain(extraArgc, extraArgv);
00200 
00201   manager.start();
00202 
00203   ifs->startStream();
00204 
00205   Timer timer;
00206   FILE* timingFP = NULL;
00207   if (optOutputTimingFile.getVal().size() > 0)
00208   {
00209     timingFP = fopen(optOutputTimingFile.getVal().c_str(), "w");
00210     if (timingFP == NULL)
00211       LFATAL("Can not open timing file: %s",
00212           optOutputTimingFile.getVal().c_str());
00213   }
00214 
00215 
00216   timer.reset();
00217   if (optTrainingMode.getVal())
00218     brain->preTraining();
00219   else
00220     brain->preDetection();
00221   float preTime = timer.getSecs();
00222 
00223   if (timingFP)
00224     fprintf(timingFP, "%s %f\n",
00225         optTrainingMode.getVal() ? "PreTraining" : "PreDetection",
00226         preTime);
00227 
00228   FILE* resultsFP = NULL;
00229   if (optOutputResultsFile.getVal().size() > 0 && !optTrainingMode.getVal())
00230   {
00231     resultsFP = fopen(optOutputResultsFile.getVal().c_str(), "w");
00232     if (resultsFP == NULL)
00233       LFATAL("Can not open results file: %s",
00234           optOutputResultsFile.getVal().c_str());
00235   }
00236   std::vector<ResultData> results;
00237 
00238   double totalTime = 0;
00239   unsigned long totalNumFrames = 0;
00240   //unsigned long long totalNumFixations = 0;
00241   double firstObjectMeanFixations = 0;
00242   double firstObjectStvarFixations = 0;
00243   double allObjectsMeanFixations = 0;
00244   double allObjectsStvarFixations = 0;
00245 
00246   int numOfBins = 1000;
00247   std::vector<double>  totalTruePositive(numOfBins, 0);
00248   std::vector<double>  totalFalseNegative(numOfBins, 0);
00249 
00250   while(1)
00251   {
00252     Image< PixRGB<byte> > inputImg;
00253     const FrameState is = ifs->updateNext();
00254     if (is == FRAME_COMPLETE)
00255       break;
00256 
00257     //grab the images
00258     GenericFrame input = ifs->readFrame();
00259     if (!input.initialized())
00260       break;
00261     inputImg = input.asRgb();
00262 
00263     totalNumFrames++;
00264 
00265     //Get the metadata and find if we have the object name in the scene
00266     rutz::shared_ptr<GenericFrame::MetaData>
00267       metaData = input.getMetaData(std::string("SceneData"));
00268     if (metaData.get() != 0) {
00269       rutz::shared_ptr<TestImages::SceneData> sceneData;
00270       sceneData.dyn_cast_from(metaData);
00271 
00272       ObjectData labeledObj;
00273 
00274       if (optFilterObject.getVal().size() > 0)
00275       {
00276         labeledObj.name = "no_" + optFilterObject.getVal();
00277         labeledObj.confidence = -1;
00278 
00279         //Sech and see if we have this object in the scene
00280         for (uint i = 0; i < sceneData->objects.size(); i++) {
00281           TestImages::ObjData objData = sceneData->objects[i];
00282           if (optFilterObject.getVal() == objData.name)
00283             labeledObj.name = objData.name;
00284         }
00285       } else {
00286         //Take the first object
00287         for (uint i = 0; i < sceneData->objects.size() && i<1; i++) {
00288           TestImages::ObjData objData = sceneData->objects[i];
00289           labeledObj.name = objData.name;
00290         }
00291       }
00292 
00293       double frameTime = -1;
00294       if (optTrainingMode.getVal())
00295       {
00296         timer.reset();
00297         brain->onTraining(inputImg, labeledObj);
00298         frameTime = timer.getSecs();
00299       } else {
00300         timer.reset();
00301         std::vector<DetLocation> smap = brain->onDetection(inputImg);
00302         frameTime = timer.getSecs();
00303 
00304         //Sort the results
00305         std::sort(smap.begin(), smap.end(), DetLocationCmp);
00306 
00307         Image<float> objsMask(inputImg.getDims(), ZEROS);
00308         for (uint obj = 0; obj < sceneData->objects.size(); obj++) {
00309           TestImages::ObjData objData = sceneData->objects[obj];
00310           drawFilledPolygon(objsMask, objData.polygon, (float)(obj+1));
00311         }
00312 
00313         unsigned long totalObjects = 0;
00314         unsigned long totalNonObjects = 0;
00315         for(uint i=0; i<objsMask.size(); i++)
00316           if (objsMask[i] > 0)
00317             totalObjects++;
00318           else
00319             totalNonObjects++;
00320 
00321         //Check if this location has hit an object
00322         unsigned long numFixations = 0;
00323         //unsigned long numObjects = 0;
00324 
00325         unsigned long fixationsToFirstObject = 0;
00326         unsigned long fixationsToAllObjects = 0;
00327 
00328         std::vector<double>  truePositive(numOfBins, 0);
00329         std::vector<double>  falseNegative(numOfBins, 0);
00330 
00331         if (smap.size() != objsMask.size())
00332           LFATAL("Smap needs to be the same size as the image");
00333         int binNum = 0;
00334         double fixPerBin = double(smap.size())/double(numOfBins);
00335         for (uint i=0; i<smap.size(); i++)
00336         {
00337           Point2D<int> loc(smap[i].i, smap[i].j);
00338           float objID = objsMask.getVal(loc);
00339 
00340           numFixations++;
00341           if ((numFixations-1) > fixPerBin*(binNum+1))
00342           {
00343             //New bin number
00344             int prevBinNum = binNum;
00345             binNum++;
00346             if(binNum >= numOfBins)
00347               LFATAL("binNum(%i) >= numOfBins(%i)", binNum, numOfBins);
00348 
00349             truePositive[binNum] = truePositive[prevBinNum];
00350             falseNegative[binNum] = falseNegative[prevBinNum];
00351           }
00352 
00353           if (objID > 0)
00354             truePositive[binNum]++;
00355           else
00356             falseNegative[binNum]++;
00357         }
00358 
00359         //Aggragate values over frames
00360         for(int i=0; i<numOfBins; i++)
00361         {
00362           totalTruePositive[i] += (truePositive[i]/totalObjects);
00363           totalFalseNegative[i] += (falseNegative[i]/totalNonObjects);
00364         }
00365 
00366         //for(uint i=0; i<smap.size(); i++)
00367         //{
00368         //  Point2D<int> loc(smap[i].i, smap[i].j);
00369         //  float objID = objsMask.getVal(loc);
00370         //  numFixations++;
00371         //  totalNumFixations++;
00372         //  if (objID > 0)
00373         //  {
00374         //    numObjects++;
00375         //    //We got an object
00376         //    if (fixationsToFirstObject == 0)
00377         //      fixationsToFirstObject = numFixations;
00378         //    if (fixationsToAllObjects == 0 &&
00379         //        numObjects == totalObjects)
00380         //      fixationsToAllObjects = numFixations;
00381         //  }
00382         //}
00383         //printf("%lu %lu %lu %lu\n", numFixations, smap.size(), numObjects, totalObjects);
00384 
00385         //printf("%lu %lu %f %lu %lu %f\n",
00386         //    fixationsToFirstObject, numFixations,
00387         //    (double)fixationsToFirstObject/(double)numFixations,
00388         //    fixationsToAllObjects, totalObjects,
00389         //    (double)fixationsToAllObjects/(double)totalObjects);
00390 
00391         //fflush(stdout);
00392         //calculate the mean and std of the fixations online
00393 
00394         //Calc out of the total number of fixations, how many of these reached
00395         //the first object
00396         const double prevFirstMean = firstObjectMeanFixations;
00397         const double percFixationsToFirstObject =
00398           (double)fixationsToFirstObject/(double)numFixations;
00399         const double firstDelta = percFixationsToFirstObject - firstObjectMeanFixations;
00400         firstObjectMeanFixations += firstDelta/totalNumFrames;
00401         firstObjectStvarFixations += (
00402             ( percFixationsToFirstObject - prevFirstMean)*
00403             (percFixationsToFirstObject - firstObjectMeanFixations)
00404             );
00405 
00406 
00407         //Calc out of the total number of fixations needed to reach all the objects
00408         // how many of these reached all the objects
00409         const double prevAllMean = allObjectsMeanFixations;
00410         const double percFixationsToAllObjects =
00411           (double)fixationsToAllObjects/(double)totalObjects;
00412         const double allDelta = percFixationsToAllObjects - allObjectsMeanFixations;
00413         allObjectsMeanFixations += allDelta/totalNumFrames;
00414         allObjectsStvarFixations += (
00415             ( percFixationsToAllObjects - prevAllMean)*
00416             (percFixationsToAllObjects - allObjectsMeanFixations)
00417             );
00418       }
00419 
00420       if (timingFP)
00421         fprintf(timingFP, "%i %f\n",
00422             ifs->frame(), frameTime);
00423       totalTime += frameTime;
00424 
00425     }
00426     ofs->writeRGB(inputImg, "input", FrameInfo("input", SRC_POS));
00427     usleep(10000);
00428   }
00429 
00430 
00431   if (resultsFP)
00432     fclose(resultsFP);
00433 
00434   timer.reset();
00435   if (optTrainingMode.getVal())
00436     brain->postTraining();
00437   else
00438     brain->postDetection();
00439   float postTime = timer.getSecs();
00440   if (timingFP)
00441     fprintf(timingFP, "%s %f\n",
00442         optTrainingMode.getVal() ? "PostTraining" : "PostDetection",
00443         postTime);
00444 
00445   if (timingFP)
00446     fclose(timingFP);
00447 
00448   //Calculate ROC curve and AP
00449   if (!optTrainingMode.getVal())
00450   {
00451 
00452     //Normalize by number of frames
00453     for(int i=0; i<numOfBins; i++)
00454     {
00455       totalFalseNegative[i] /= totalNumFrames;
00456       totalTruePositive[i] /= totalNumFrames;
00457     }
00458 
00459     ////Output the roc curve
00460     FILE* rocFP = NULL;
00461     if (optOutputROCFile.getVal().size() > 0)
00462     {
00463       rocFP = fopen(optOutputROCFile.getVal().c_str(), "w");
00464       if (rocFP == NULL)
00465         LFATAL("Can not open roc file: %s",
00466             optOutputROCFile.getVal().c_str());
00467     }
00468     if (rocFP)
00469     {
00470       for(int i=0; i<numOfBins; i++)
00471         fprintf(rocFP, "%f %f\n", totalFalseNegative[i], totalTruePositive[i]);
00472       fclose(rocFP);
00473     }
00474 
00475     //Calculate the average true pos
00476     double ap=0;
00477     double step = 0.1;
00478     for(double t=0; t<=1; t+=step)
00479     {
00480       double maxPrec = 0;
00481       for(int i=0; i<numOfBins; i++)
00482       {
00483         if (totalFalseNegative[i] >= t)
00484           if (totalTruePositive[i] > maxPrec)
00485             maxPrec = totalTruePositive[i];
00486       }
00487       ap += (maxPrec / ((1/step)+1) ); //take the average
00488     }
00489 
00490     printf("Stats: Frames:%lu FPS:%f Ap:%f\n",
00491         totalNumFrames,
00492         (double)totalNumFrames/totalTime,
00493         ap);
00494     fflush(stdout);
00495   } else {
00496     printf("Stats: Frames:%lu FPS:%f \n",
00497         totalNumFrames, (double)totalNumFrames/totalTime);
00498   }
00499 
00500 
00501 
00502   destoryBrain(brain);
00503 
00504   //unload the library
00505   dlclose(brainLib);
00506 
00507 
00508   return 0;
00509 }
00510 
00511 
00512 // ######################################################################
00513 /* So things look consistent in everyone's emacs... */
00514 /* Local Variables: */
00515 /* indent-tabs-mode: nil */
00516 /* End: */
Generated on Sun May 8 08:42:22 2011 for iLab Neuromorphic Vision Toolkit by  doxygen 1.6.3