00001 /*!@file MBARI/test-mbari.C test program to detect marine animals 00002 */ 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2002 // 00005 // by the University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Dirk Walther <walther@caltech.edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/MBARI/test-mbari.C $ 00035 // $Id: test-mbari.C 10982 2009-03-05 05:11:22Z itti $ 00036 // 00037 00038 #include "Channels/ChannelOpts.H" 00039 #include "Component/GlobalOpts.H" 00040 #include "Component/ModelManager.H" 00041 #include "Component/ModelOptionDef.H" 00042 #include "Image/ColorOps.H" 00043 #include "Image/FilterOps.H" // for lowPass5y() 00044 #include "Image/ImageCache.H" 00045 #include "Image/Kernels.H" // for twofiftyfives() 00046 #include "Image/MorphOps.H" // for openImg(), closeImg() 00047 #include "Image/PyramidOps.H" 00048 #include "Image/Transforms.H" 00049 #include "MBARI/FOEestimator.H" 00050 #include "MBARI/MbariFrameSeries.H" 00051 #include "MBARI/MbariResultViewer.H" 00052 #include "MBARI/VisualEvent.H" 00053 #include "MBARI/mbariFunctions.H" 00054 #include "Media/FrameRange.H" 00055 #include "Media/MediaOpts.H" 00056 #include "Neuro/NeuroOpts.H" 00057 #include "Neuro/SimulationViewer.H" 00058 #include "Neuro/SpatialMetrics.H" 00059 #include "Neuro/StdBrain.H" 00060 #include "Simulation/SimEventQueueConfigurator.H" 00061 #include "Util/StringConversions.H" 00062 #include "Util/log.H" 00063 #include "Util/sformat.H" 00064 00065 #include <algorithm> 00066 #include <cstdio> 00067 #include <deque> 00068 #include <iostream> 00069 #include <sstream> 00070 00071 // Used by: InputMbariFrameSeries 00072 static const ModelOptionDef OPT_InputMbariFrameRange = 00073 { MODOPT_ARG(FrameRange), "InputMbariFrameRange", &MOC_MBARIRV, OPTEXP_MRV, 00074 "Input frame range and delay in ms", 00075 "mbari-input-frames", 'M', "<first>-<last>", "0-0@0.0" }; 00076 00077 namespace 00078 { 00079 struct MbariFrameRange 00080 { 00081 MbariFrameRange() : first(0), last(0) {} 00082 00083 int first, last; 00084 }; 00085 00086 bool operator==(const MbariFrameRange& r1, 00087 const MbariFrameRange& r2) 00088 { 00089 return (r1.first == r2.first && r1.last == r2.last); 00090 } 00091 00092 std::string convertToString(const MbariFrameRange& val) 00093 { 00094 return sformat("%d-%d", val.first, val.last); 00095 } 00096 00097 void convertFromString(const std::string& str, MbariFrameRange& val) 00098 { 00099 std::stringstream s; int first = -2, last = -2; char c; 00100 s<<str; s>>first>>c>>last; 00101 if (first == -2 || last == -2 || c != '-') 00102 conversion_error::raise<MbariFrameRange>(str); 00103 00104 val.first = first; val.last = last; 00105 } 00106 } 00107 00108 int main(const int argc, const char** argv) 00109 { 00110 // ######## Initialization of variables, reading of parameters etc. 00111 // a few constants 00112 const float maxEvolveTime = 0.5F; 00113 const uint maxNumSalSpots = 20; 00114 const uint minFrameNum = 5; 00115 const int minSizeRatio = 10000; 00116 const int maxDistRatio = 18; 00117 const int foaSizeRatio = 19; 00118 const int circleRadiusRatio = 40; 00119 const byte threshold = 5; 00120 const Image<byte> se = twofiftyfives(3); 00121 const int numFrameDist = 5; 00122 00123 // initialize a few things 00124 ModelManager manager("MBARI test program"); 00125 00126 nub::soft_ref<SimEventQueueConfigurator> 00127 seqc(new SimEventQueueConfigurator(manager)); 00128 manager.addSubComponent(seqc); 00129 00130 nub::soft_ref<InputMbariFrameSeries> imfs(new InputMbariFrameSeries(manager)); 00131 manager.addSubComponent(imfs); 00132 00133 nub::soft_ref<OutputMbariFrameSeries> omfs(new OutputMbariFrameSeries(manager)); 00134 manager.addSubComponent(omfs); 00135 00136 nub::soft_ref<MbariResultViewer> rv(new MbariResultViewer(manager,omfs)); 00137 manager.addSubComponent(rv); 00138 00139 nub::soft_ref<StdBrain> brain(new StdBrain(manager)); 00140 manager.addSubComponent(brain); 00141 00142 nub::ref<SpatialMetrics> metrics(new SpatialMetrics(manager)); 00143 manager.addSubComponent(metrics); 00144 00145 // set up a frame range 00146 OModelParam<MbariFrameRange> frameRange 00147 (&OPT_InputMbariFrameRange, &manager); 00148 00149 // set a bunch of paramters 00150 manager.setOptionValString(&OPT_OriInteraction,"SubtractMean"); 00151 manager.setOptionValString(&OPT_OrientComputeType,"Steerable"); 00152 manager.setOptionValString(&OPT_RawVisualCortexChans,"O:5IC"); 00153 manager.setOptionValString(&OPT_UseRandom,"false"); 00154 manager.setOptionValString(&OPT_ShapeEstimatorMode,"ConspicuityMap"); 00155 manager.setOptionValString(&OPT_ShapeEstimatorSmoothMethod,"None"); 00156 manager.setOptionValString(&OPT_IORtype,"ShapeEstCM"); 00157 manager.setOptionValString(&OPT_SVdisplayFOA,"true"); 00158 manager.setOptionValString(&OPT_SVdisplayPatch,"false"); 00159 manager.setOptionValString(&OPT_SVdisplayFOALinks,"false"); 00160 manager.setOptionValString(&OPT_SVdisplayAdditive,"true"); 00161 manager.setOptionValString(&OPT_SVdisplayTime,"false"); 00162 manager.setOptionValString(&OPT_SVdisplayBoring,"false"); 00163 00164 // parse the command line for the file names and set them 00165 if (!manager.parseCommandLine(argc, argv, "<input> <output>",1,2)) 00166 return(1); 00167 00168 // get the file names straight 00169 imfs->setFileStem(manager.getExtraArg(0)); 00170 std::string outFileStem; 00171 if (manager.numExtraArgs() == 1) 00172 { 00173 outFileStem = "Res_"; 00174 outFileStem.append(manager.getExtraArg(0)); 00175 omfs->setFileStem(outFileStem); 00176 } 00177 else 00178 { 00179 outFileStem = manager.getExtraArg(1); 00180 omfs->setFileStem(outFileStem); 00181 } 00182 00183 00184 // get image dimensions and set a few paremeters that depend on it 00185 const Dims dims = imfs->peekDims(frameRange.getVal().first); 00186 const int minSize = dims.sz() / minSizeRatio; 00187 LINFO("minSize = %i",minSize); 00188 const int circleRadius = dims.w() / circleRadiusRatio; 00189 const int maxDist = dims.w() / maxDistRatio; 00190 LINFO("maxDist = %i",maxDist); 00191 const int foaSize = dims.w() / foaSizeRatio; 00192 metrics->setFOAradius(foaSize); 00193 nub::soft_ref<SimEventQueue> seq = seqc->getQ(); 00194 00195 00196 // start all the ModelComponents 00197 manager.start(); 00198 00199 LINFO("after manager.start();"); 00200 00201 // initialize the visual event set 00202 VisualEventSet eventSet(maxDist, minFrameNum, minSize, manager.getExtraArg(0)); 00203 int countFrameDist = 1; 00204 00205 // are we loading the event structure from a file? 00206 const bool loadedEvents = rv->isLoadEventsNameSet(); 00207 00208 LINFO("before load events"); 00209 if (loadedEvents) rv->loadVisualEventSet(eventSet); 00210 LINFO("after load events"); 00211 00212 PropertyVectorSet pvs; 00213 FOEestimator foeEst(20,0); 00214 00215 // are we loading the set of property vectors from a file? 00216 const bool loadedProperties = rv->isLoadPropertiesNameSet(); 00217 if (loadedProperties) rv->loadProperties(pvs); 00218 00219 // initialize some more 00220 ImageCacheAvg< PixRGB<byte> > avgCache(rv->getAvgCacheSize()); 00221 ImageCache< PixRGB<byte> > outCache(0); 00222 std::deque<int> outFrameNum; 00223 Image< PixRGB<byte> > img; 00224 00225 // do we actually need to process the frames? 00226 if (rv->needFrames()) 00227 { 00228 // pre-load and low-pass a few frames to get a valid average 00229 int currentFrame = frameRange.getVal().first; 00230 while(avgCache.size() < rv->getAvgCacheSize()) 00231 { 00232 if (currentFrame > frameRange.getVal().last) 00233 { 00234 LERROR("Less input frames than necessary for sliding average - " 00235 "using all the frames for caching."); 00236 break; 00237 } 00238 LINFO("Caching frame %06d.",currentFrame); 00239 img = lowPass5y(imfs->readRGB(currentFrame)); 00240 avgCache.push_back(img); 00241 outCache.push_back(img); 00242 outFrameNum.push_back(currentFrame); 00243 ++currentFrame; 00244 } 00245 } // end if needFrames 00246 00247 // ######## loop over frames #################### 00248 for (int curFrame = frameRange.getVal().first; curFrame <= frameRange.getVal().last; ++curFrame) 00249 { 00250 if (rv->needFrames()) 00251 { 00252 // get image from cache or load and low-pass 00253 uint cacheFrameNum = curFrame - frameRange.getVal().first; 00254 if (cacheFrameNum < avgCache.size()) 00255 { 00256 // we have cached this guy already 00257 LINFO("Processing frame %06d from cache.",curFrame); 00258 img = avgCache[cacheFrameNum]; 00259 } 00260 else 00261 { 00262 // we need to load and low pass it and put it in the cache 00263 LINFO("Loading frame %06d.",curFrame); 00264 if (curFrame > frameRange.getVal().last) 00265 { 00266 LERROR("Premature end of frame sequence - bailing out."); 00267 break; 00268 } 00269 img = lowPass5y(imfs->readRGB(curFrame)); 00270 avgCache.push_back(img); 00271 outCache.push_back(img); 00272 outFrameNum.push_back(curFrame); 00273 ++curFrame; 00274 } 00275 00276 // subtract the running average from the image 00277 rv->output(img,curFrame,"LowPassed"); 00278 img = avgCache.clampedDiffMean(img); 00279 rv->output(img,curFrame,"diffAvg"); 00280 00281 } // end if needFrames 00282 00283 00284 // all this we do not have to do if we load the event structure from a file 00285 if (!loadedEvents) 00286 { 00287 00288 // create bw and binary versions of the img 00289 Image<byte> bwImg = maxRGB(img); 00290 rv->output(bwImg,curFrame,"BW"); 00291 00292 Image<byte> bitImg = makeBinary(bwImg, threshold); 00293 rv->output(bitImg,curFrame,"bin"); 00294 00295 Vector2D curFOE = foeEst.updateFOE(bitImg); 00296 00297 /* 00298 std::cout << "Frame " << curFrame << ": FOE = "; 00299 if (curFOE.isValid()) 00300 std::cout << curFOE.x() << " , " << curFOE.y() << "\n"; 00301 else 00302 std::cout << "invalid\n"; 00303 */ 00304 if (curFOE.isValid()) std::cout << curFOE.x() << ' ' << curFOE.y() <<'\n'; 00305 else std::cout << '\n'; 00306 00307 bitImg = closeImg(openImg(bitImg,se),se); 00308 rv->output(bitImg,curFrame,"Eroded"); 00309 00310 // update the events using the binary version 00311 eventSet.updateEvents(bitImg, curFOE, curFrame); 00312 00313 // is counter at 0? 00314 --countFrameDist; 00315 if (countFrameDist == 0) 00316 { 00317 countFrameDist = numFrameDist; 00318 00319 // get BitObjects at winning locations 00320 std::list<BitObject> sobjs = getSalRegions(brain, seq, 00321 img, bitImg, 00322 maxEvolveTime, 00323 maxNumSalSpots, 00324 minSize); 00325 //Rectangle region = img.getBounds(); 00326 //std::list<BitObject> sobjs = getLargestObjects(bitImg, 00327 // region, 00328 // maxNumSalSpots, 00329 // minSize); 00330 00331 // display all the extracted objects 00332 rv->output(showAllObjects(sobjs),curFrame,"salient objects"); 00333 00334 // initiate events with these objects 00335 eventSet.initiateEvents(sobjs, curFrame); 00336 } 00337 00338 // last frame? -> close everyone 00339 if (curFrame == frameRange.getVal().last) eventSet.closeAll(); 00340 00341 // weed out migit events (a.k.a too few frames) 00342 eventSet.cleanUp(curFrame); 00343 } // end if (!loadedEvents) 00344 00345 // any closed events need flushing? -> flush out images 00346 int readyFrame; 00347 if ((curFrame == frameRange.getVal().last) || loadedEvents) 00348 readyFrame = curFrame; 00349 else 00350 //readyFrame = eventSet.getAllClosedFrameNum(curFrame); 00351 readyFrame = std::max(curFrame - int(minFrameNum), -1); 00352 00353 // no frame ready -> go on 00354 if (readyFrame == -1) continue; 00355 00356 // need to obtain the property vector set? 00357 if (!loadedProperties) pvs = eventSet.getPropertyVectorSet(); 00358 00359 // do this only when we actuall loaded frames 00360 if (rv->needFrames()) 00361 { 00362 // see which frames are ready - output them and pop them off the cache 00363 while(outFrameNum.front() <= readyFrame) 00364 { 00365 rv->outputResultFrame(outCache.front(),outFileStem, 00366 outFrameNum.front(), 00367 eventSet,pvs,circleRadius); 00368 00369 // need to save any event clips? 00370 uint csavenum = rv->numSaveEventClips(); 00371 for (uint idx = 0; idx < csavenum; ++idx) 00372 { 00373 uint evnum = rv->getSaveEventClipNum(idx); 00374 if (!eventSet.doesEventExist(evnum)) continue; 00375 00376 VisualEvent event = eventSet.getEventByNumber(evnum); 00377 if (event.isFrameOk(outFrameNum.front())) 00378 rv->saveSingleEventFrame(outCache.front(), 00379 outFrameNum.front(),event); 00380 } 00381 00382 outCache.pop_front(); 00383 outFrameNum.pop_front(); 00384 if (outFrameNum.empty()) break; 00385 } 00386 } 00387 00388 } // end loop over all frames 00389 00390 // write out eventSet? 00391 if (rv->isSaveEventsNameSet()) rv->saveVisualEventSet(eventSet); 00392 00393 // write out property vector set? 00394 if (rv->isSavePropertiesNameSet()) rv->saveProperties(pvs); 00395 00396 // write out positions? 00397 if (rv->isSavePositionsNameSet()) rv->savePositions(eventSet); 00398 00399 } // end main 00400 00401 00402 // ###################################################################### 00403 /* So things look consistent in everyone's emacs... */ 00404 /* Local Variables: */ 00405 /* indent-tabs-mode: nil */ 00406 /* End: */