test-mbari.C

Go to the documentation of this file.
00001 /*!@file MBARI/test-mbari.C test program to detect marine animals
00002  */
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2002   //
00005 // by the University of Southern California (USC) and the iLab at USC.  //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: Dirk Walther <walther@caltech.edu>
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/MBARI/test-mbari.C $
00035 // $Id: test-mbari.C 10982 2009-03-05 05:11:22Z itti $
00036 //
00037 
00038 #include "Channels/ChannelOpts.H"
00039 #include "Component/GlobalOpts.H"
00040 #include "Component/ModelManager.H"
00041 #include "Component/ModelOptionDef.H"
00042 #include "Image/ColorOps.H"
00043 #include "Image/FilterOps.H"    // for lowPass5y()
00044 #include "Image/ImageCache.H"
00045 #include "Image/Kernels.H"      // for twofiftyfives()
00046 #include "Image/MorphOps.H"     // for openImg(), closeImg()
00047 #include "Image/PyramidOps.H"
00048 #include "Image/Transforms.H"
00049 #include "MBARI/FOEestimator.H"
00050 #include "MBARI/MbariFrameSeries.H"
00051 #include "MBARI/MbariResultViewer.H"
00052 #include "MBARI/VisualEvent.H"
00053 #include "MBARI/mbariFunctions.H"
00054 #include "Media/FrameRange.H"
00055 #include "Media/MediaOpts.H"
00056 #include "Neuro/NeuroOpts.H"
00057 #include "Neuro/SimulationViewer.H"
00058 #include "Neuro/SpatialMetrics.H"
00059 #include "Neuro/StdBrain.H"
00060 #include "Simulation/SimEventQueueConfigurator.H"
00061 #include "Util/StringConversions.H"
00062 #include "Util/log.H"
00063 #include "Util/sformat.H"
00064 
00065 #include <algorithm>
00066 #include <cstdio>
00067 #include <deque>
00068 #include <iostream>
00069 #include <sstream>
00070 
00071 // Used by: InputMbariFrameSeries
00072 static const ModelOptionDef OPT_InputMbariFrameRange =
00073   { MODOPT_ARG(FrameRange), "InputMbariFrameRange", &MOC_MBARIRV, OPTEXP_MRV,
00074     "Input frame range and delay in ms",
00075     "mbari-input-frames", 'M', "<first>-<last>", "0-0@0.0" };
00076 
00077 namespace
00078 {
00079   struct MbariFrameRange
00080   {
00081     MbariFrameRange() : first(0), last(0) {}
00082 
00083     int first, last;
00084   };
00085 
00086   bool operator==(const MbariFrameRange& r1,
00087                   const MbariFrameRange& r2)
00088   {
00089     return (r1.first == r2.first && r1.last == r2.last);
00090   }
00091 
00092   std::string convertToString(const MbariFrameRange& val)
00093   {
00094     return sformat("%d-%d", val.first, val.last);
00095   }
00096 
00097   void convertFromString(const std::string& str, MbariFrameRange& val)
00098   {
00099     std::stringstream s; int first = -2, last = -2; char c;
00100     s<<str; s>>first>>c>>last;
00101     if (first == -2 || last == -2 || c != '-')
00102       conversion_error::raise<MbariFrameRange>(str);
00103 
00104     val.first = first; val.last = last;
00105   }
00106 }
00107 
00108 int main(const int argc, const char** argv)
00109 {
00110   // ######## Initialization of variables, reading of parameters etc.
00111   // a few constants
00112   const float maxEvolveTime = 0.5F;
00113   const uint maxNumSalSpots = 20;
00114   const uint minFrameNum = 5;
00115   const int minSizeRatio = 10000;
00116   const int maxDistRatio = 18;
00117   const int foaSizeRatio = 19;
00118   const int circleRadiusRatio = 40;
00119   const byte threshold = 5;
00120   const Image<byte> se = twofiftyfives(3);
00121   const int numFrameDist = 5;
00122 
00123   // initialize a few things
00124   ModelManager manager("MBARI test program");
00125 
00126   nub::soft_ref<SimEventQueueConfigurator>
00127     seqc(new SimEventQueueConfigurator(manager));
00128   manager.addSubComponent(seqc);
00129 
00130   nub::soft_ref<InputMbariFrameSeries> imfs(new InputMbariFrameSeries(manager));
00131   manager.addSubComponent(imfs);
00132 
00133   nub::soft_ref<OutputMbariFrameSeries> omfs(new OutputMbariFrameSeries(manager));
00134   manager.addSubComponent(omfs);
00135 
00136   nub::soft_ref<MbariResultViewer> rv(new MbariResultViewer(manager,omfs));
00137   manager.addSubComponent(rv);
00138 
00139   nub::soft_ref<StdBrain> brain(new StdBrain(manager));
00140   manager.addSubComponent(brain);
00141 
00142   nub::ref<SpatialMetrics> metrics(new SpatialMetrics(manager));
00143   manager.addSubComponent(metrics);
00144 
00145   // set up a frame range
00146   OModelParam<MbariFrameRange> frameRange
00147     (&OPT_InputMbariFrameRange, &manager);
00148 
00149   // set a bunch of paramters
00150   manager.setOptionValString(&OPT_OriInteraction,"SubtractMean");
00151   manager.setOptionValString(&OPT_OrientComputeType,"Steerable");
00152   manager.setOptionValString(&OPT_RawVisualCortexChans,"O:5IC");
00153   manager.setOptionValString(&OPT_UseRandom,"false");
00154   manager.setOptionValString(&OPT_ShapeEstimatorMode,"ConspicuityMap");
00155   manager.setOptionValString(&OPT_ShapeEstimatorSmoothMethod,"None");
00156   manager.setOptionValString(&OPT_IORtype,"ShapeEstCM");
00157   manager.setOptionValString(&OPT_SVdisplayFOA,"true");
00158   manager.setOptionValString(&OPT_SVdisplayPatch,"false");
00159   manager.setOptionValString(&OPT_SVdisplayFOALinks,"false");
00160   manager.setOptionValString(&OPT_SVdisplayAdditive,"true");
00161   manager.setOptionValString(&OPT_SVdisplayTime,"false");
00162   manager.setOptionValString(&OPT_SVdisplayBoring,"false");
00163 
00164   // parse the command line for the file names and set them
00165   if (!manager.parseCommandLine(argc, argv, "<input> <output>",1,2))
00166     return(1);
00167 
00168   // get the file names straight
00169   imfs->setFileStem(manager.getExtraArg(0));
00170   std::string outFileStem;
00171   if (manager.numExtraArgs() == 1)
00172     {
00173       outFileStem = "Res_";
00174       outFileStem.append(manager.getExtraArg(0));
00175       omfs->setFileStem(outFileStem);
00176     }
00177   else
00178     {
00179       outFileStem = manager.getExtraArg(1);
00180       omfs->setFileStem(outFileStem);
00181     }
00182 
00183 
00184   // get image dimensions and set a few paremeters that depend on it
00185   const Dims dims = imfs->peekDims(frameRange.getVal().first);
00186   const int minSize = dims.sz() / minSizeRatio;
00187   LINFO("minSize = %i",minSize);
00188   const int circleRadius = dims.w() / circleRadiusRatio;
00189   const int maxDist = dims.w() / maxDistRatio;
00190   LINFO("maxDist = %i",maxDist);
00191   const int foaSize = dims.w() / foaSizeRatio;
00192   metrics->setFOAradius(foaSize);
00193   nub::soft_ref<SimEventQueue> seq = seqc->getQ();
00194 
00195 
00196   // start all the ModelComponents
00197   manager.start();
00198 
00199   LINFO("after manager.start();");
00200 
00201   // initialize the visual event set
00202   VisualEventSet eventSet(maxDist, minFrameNum, minSize, manager.getExtraArg(0));
00203   int countFrameDist = 1;
00204 
00205   // are we loading the event structure from a file?
00206   const bool loadedEvents = rv->isLoadEventsNameSet();
00207 
00208   LINFO("before load events");
00209   if (loadedEvents) rv->loadVisualEventSet(eventSet);
00210   LINFO("after load events");
00211 
00212   PropertyVectorSet pvs;
00213   FOEestimator foeEst(20,0);
00214 
00215   // are we loading the set of property vectors from a file?
00216   const bool loadedProperties = rv->isLoadPropertiesNameSet();
00217   if (loadedProperties) rv->loadProperties(pvs);
00218 
00219   // initialize some more
00220   ImageCacheAvg< PixRGB<byte> > avgCache(rv->getAvgCacheSize());
00221   ImageCache< PixRGB<byte> > outCache(0);
00222   std::deque<int> outFrameNum;
00223   Image< PixRGB<byte> > img;
00224 
00225   // do we actually need to process the frames?
00226   if (rv->needFrames())
00227     {
00228       // pre-load and low-pass a few frames to get a valid average
00229       int currentFrame = frameRange.getVal().first;
00230       while(avgCache.size() < rv->getAvgCacheSize())
00231         {
00232           if (currentFrame > frameRange.getVal().last)
00233             {
00234               LERROR("Less input frames than necessary for sliding average - "
00235                      "using all the frames for caching.");
00236               break;
00237             }
00238           LINFO("Caching frame %06d.",currentFrame);
00239           img = lowPass5y(imfs->readRGB(currentFrame));
00240           avgCache.push_back(img);
00241           outCache.push_back(img);
00242           outFrameNum.push_back(currentFrame);
00243           ++currentFrame;
00244         }
00245     } // end if needFrames
00246 
00247   // ######## loop over frames ####################
00248   for (int curFrame = frameRange.getVal().first; curFrame <= frameRange.getVal().last; ++curFrame)
00249     {
00250       if (rv->needFrames())
00251         {
00252           // get image from cache or load and low-pass
00253           uint cacheFrameNum = curFrame - frameRange.getVal().first;
00254           if (cacheFrameNum < avgCache.size())
00255             {
00256               // we have cached this guy already
00257               LINFO("Processing frame %06d from cache.",curFrame);
00258               img = avgCache[cacheFrameNum];
00259             }
00260           else
00261             {
00262               // we need to load and low pass it and put it in the cache
00263               LINFO("Loading frame %06d.",curFrame);
00264               if (curFrame > frameRange.getVal().last)
00265                 {
00266                   LERROR("Premature end of frame sequence - bailing out.");
00267                   break;
00268                 }
00269               img = lowPass5y(imfs->readRGB(curFrame));
00270               avgCache.push_back(img);
00271               outCache.push_back(img);
00272               outFrameNum.push_back(curFrame);
00273               ++curFrame;
00274             }
00275 
00276           // subtract the running average from the image
00277           rv->output(img,curFrame,"LowPassed");
00278           img = avgCache.clampedDiffMean(img);
00279           rv->output(img,curFrame,"diffAvg");
00280 
00281         } // end if needFrames
00282 
00283 
00284       // all this we do not have to do if we load the event structure from a file
00285       if (!loadedEvents)
00286         {
00287 
00288           // create bw and binary versions of the img
00289           Image<byte> bwImg = maxRGB(img);
00290           rv->output(bwImg,curFrame,"BW");
00291 
00292           Image<byte> bitImg = makeBinary(bwImg, threshold);
00293           rv->output(bitImg,curFrame,"bin");
00294 
00295           Vector2D curFOE = foeEst.updateFOE(bitImg);
00296 
00297           /*
00298           std::cout << "Frame " << curFrame << ": FOE = ";
00299           if (curFOE.isValid())
00300             std::cout << curFOE.x() << " , " << curFOE.y() << "\n";
00301           else
00302             std::cout << "invalid\n";
00303           */
00304           if (curFOE.isValid()) std::cout << curFOE.x() << ' ' << curFOE.y() <<'\n';
00305           else std::cout << '\n';
00306 
00307           bitImg = closeImg(openImg(bitImg,se),se);
00308           rv->output(bitImg,curFrame,"Eroded");
00309 
00310           // update the events using the binary version
00311           eventSet.updateEvents(bitImg, curFOE, curFrame);
00312 
00313           // is counter at 0?
00314           --countFrameDist;
00315           if (countFrameDist == 0)
00316             {
00317               countFrameDist = numFrameDist;
00318 
00319               // get BitObjects at winning locations
00320               std::list<BitObject> sobjs = getSalRegions(brain, seq,
00321                                                          img, bitImg,
00322                                                          maxEvolveTime,
00323                                                          maxNumSalSpots,
00324                                                          minSize);
00325               //Rectangle region = img.getBounds();
00326               //std::list<BitObject> sobjs = getLargestObjects(bitImg,
00327               //                                             region,
00328               //                                             maxNumSalSpots,
00329               //                                             minSize);
00330 
00331               // display all the extracted objects
00332               rv->output(showAllObjects(sobjs),curFrame,"salient objects");
00333 
00334               // initiate events with these objects
00335               eventSet.initiateEvents(sobjs, curFrame);
00336             }
00337 
00338           // last frame? -> close everyone
00339           if (curFrame == frameRange.getVal().last) eventSet.closeAll();
00340 
00341           // weed out migit events (a.k.a too few frames)
00342           eventSet.cleanUp(curFrame);
00343         } // end if (!loadedEvents)
00344 
00345       // any closed events need flushing? -> flush out images
00346       int readyFrame;
00347       if ((curFrame == frameRange.getVal().last) || loadedEvents)
00348         readyFrame = curFrame;
00349       else
00350         //readyFrame = eventSet.getAllClosedFrameNum(curFrame);
00351         readyFrame = std::max(curFrame - int(minFrameNum), -1);
00352 
00353       // no frame ready -> go on
00354       if (readyFrame == -1) continue;
00355 
00356       // need to obtain the property vector set?
00357       if (!loadedProperties) pvs = eventSet.getPropertyVectorSet();
00358 
00359       // do this only when we actuall loaded frames
00360       if (rv->needFrames())
00361         {
00362           // see which frames are ready - output them and pop them off the cache
00363           while(outFrameNum.front() <= readyFrame)
00364             {
00365               rv->outputResultFrame(outCache.front(),outFileStem,
00366                                     outFrameNum.front(),
00367                                     eventSet,pvs,circleRadius);
00368 
00369               // need to save any event clips?
00370               uint csavenum = rv->numSaveEventClips();
00371               for (uint idx = 0; idx < csavenum; ++idx)
00372                 {
00373                   uint evnum = rv->getSaveEventClipNum(idx);
00374                   if (!eventSet.doesEventExist(evnum)) continue;
00375 
00376                   VisualEvent event = eventSet.getEventByNumber(evnum);
00377                   if (event.isFrameOk(outFrameNum.front()))
00378                     rv->saveSingleEventFrame(outCache.front(),
00379                                              outFrameNum.front(),event);
00380                 }
00381 
00382               outCache.pop_front();
00383               outFrameNum.pop_front();
00384               if (outFrameNum.empty()) break;
00385             }
00386         }
00387 
00388     } // end loop over all frames
00389 
00390   // write out eventSet?
00391   if (rv->isSaveEventsNameSet()) rv->saveVisualEventSet(eventSet);
00392 
00393   // write out property vector set?
00394   if (rv->isSavePropertiesNameSet()) rv->saveProperties(pvs);
00395 
00396   // write out positions?
00397   if (rv->isSavePositionsNameSet()) rv->savePositions(eventSet);
00398 
00399 } // end main
00400 
00401 
00402 // ######################################################################
00403 /* So things look consistent in everyone's emacs... */
00404 /* Local Variables: */
00405 /* indent-tabs-mode: nil */
00406 /* End: */