00001 /*!@file Neuro/SimulationViewerCompress.C multi-foveated saliency-based 00002 compression */ 00003 00004 // //////////////////////////////////////////////////////////////////// // 00005 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2003 // 00006 // by the University of Southern California (USC) and the iLab at USC. // 00007 // See http://iLab.usc.edu for information about this project. // 00008 // //////////////////////////////////////////////////////////////////// // 00009 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00010 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00011 // in Visual Environments, and Applications'' by Christof Koch and // 00012 // Laurent Itti, California Institute of Technology, 2001 (patent // 00013 // pending; application number 09/912,225 filed July 23, 2001; see // 00014 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00015 // //////////////////////////////////////////////////////////////////// // 00016 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00017 // // 00018 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00019 // redistribute it and/or modify it under the terms of the GNU General // 00020 // Public License as published by the Free Software Foundation; either // 00021 // version 2 of the License, or (at your option) any later version. // 00022 // // 00023 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00024 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00025 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00026 // PURPOSE. See the GNU General Public License for more details. // 00027 // // 00028 // You should have received a copy of the GNU General Public License // 00029 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00030 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00031 // Boston, MA 02111-1307 USA. // 00032 // //////////////////////////////////////////////////////////////////// // 00033 // 00034 // Primary maintainer for this file: Laurent Itti <itti@usc.edu> 00035 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Neuro/SimulationViewerCompress.C $ 00036 // $Id: SimulationViewerCompress.C 13343 2010-04-30 22:37:42Z lior $ 00037 // 00038 00039 #include "Neuro/SimulationViewerCompress.H" 00040 00041 #include "Channels/IntensityChannel.H" 00042 #include "Channels/ChannelOpts.H" // for OPT_LevelSpec 00043 #include "Channels/ChannelMaps.H" 00044 #include "Component/OptionManager.H" 00045 #include "Image/ColorOps.H" // for toRGB() etc. 00046 #include "Image/CutPaste.H" // for concatX(), inplacePaste() etc. 00047 #include "Image/DrawOps.H" // for drawDisk(), drawPatch() etc. 00048 #include "Image/FilterOps.H" // for lowPass9() 00049 #include "Image/MathOps.H" // for binaryReverse(), thresholdedMix() 00050 #include "Image/PyramidOps.H" // for buildPyrGaussian(), weightedBlur() 00051 #include "Image/ShapeOps.H" // for rescale() 00052 #include "Image/Transforms.H" // for chamfer34() 00053 #include "Neuro/AttentionGuidanceMap.H" 00054 #include "Neuro/Brain.H" 00055 #include "Neuro/NeuroOpts.H" 00056 #include "Neuro/Retina.H" 00057 #include "Neuro/SaccadeControllers.H" 00058 #include "Neuro/SaccadeControllerConfigurator.H" 00059 #include "Neuro/SaliencyMap.H" 00060 #include "Neuro/ShapeEstimator.H" 00061 #include "Neuro/SpatialMetrics.H" 00062 #include "Neuro/TaskRelevanceMap.H" 00063 #include "Neuro/VisualCortex.H" 00064 #include "Simulation/SimulationOpts.H" 00065 #include "Transport/FrameInfo.H" 00066 #include "Transport/FrameOstream.H" 00067 #include "Util/MathFunctions.H" 00068 #include "Neuro/NeuroSimEvents.H" 00069 #include "Media/MediaSimEvents.H" 00070 00071 #include <stdio.h> 00072 // ###################################################################### 00073 SimulationViewerCompress:: 00074 SimulationViewerCompress(OptionManager& mgr, 00075 const std::string& descrName, 00076 const std::string& tagName) : 00077 SimulationViewer(mgr, descrName, tagName), 00078 SIMCALLBACK_INIT(SimEventRetinaImage), 00079 SIMCALLBACK_INIT(SimEventSaccadeStatusEye), 00080 SIMCALLBACK_INIT(SimEventSaveOutput), 00081 itsFOAradius(&OPT_FOAradius, this), 00082 itsNumFoveas(&OPT_SVCOMPnumFoveas, this), 00083 itsSaveTraj(&OPT_SVsaveTraj, this), 00084 itsSaveMegaCombo(&OPT_SVmegaCombo, this), 00085 itsSaveMask(&OPT_SVCOMPsaveMask, this), 00086 itsSaveFoveatedImage(&OPT_SVCOMPsaveFoveatedImage, this), 00087 itsDistanceFactor(&OPT_SVCOMPDistanceFactor, this), 00088 itsSaveEyeCombo(&OPT_SVCOMPsaveEyeCombo, this), 00089 itsDisplayPatch(&OPT_SVdisplayPatch, this), 00090 itsDisplayFOA(&OPT_SVdisplayFOA, this), 00091 itsDisplayEye(&OPT_SVCOMPdisplayHumanEye, this), 00092 itsColorNormal("SVcolorNormal", this, PixRGB<byte>(255, 255, 0)), 00093 itsColorEye("SVcolorHumanEye", this, PixRGB<byte>(128, 255, 255)), 00094 itsHeadRadius(&OPT_HeadMarkerRadius, this), 00095 itsMultiRetinaDepth(&OPT_SVCOMPMultiRetinaDepth, this), 00096 itsCacheSize(&OPT_SVCOMPcacheSize, this), 00097 itsUseTRMmax(&OPT_SVCOMPuseTRMmax, this), 00098 itsFoveaSCtype(&OPT_SVCOMPfoveaSCtype, this), 00099 itsOutFname(&OPT_SVEMoutFname, this), 00100 itsLevelSpec(&OPT_LevelSpec, this), 00101 itsNumRandomSamples(&OPT_SVEMnumRandomSamples, this), 00102 itsEyeCompare(&OPT_SVCOMPeyeCompare, this), 00103 itsIFramePeriod(&OPT_SVCOMPiframePeriod, this), 00104 itsMultiTraj(), 00105 itsSC(), itsInputTime(), itsFrame(-1), itsMask(), 00106 itsIgnoreSC(), itsCurrentMask(), itsOutFile(NULL), itsBlurMask(), 00107 itsEyeData() 00108 { 00109 LINFO("NOTE: disabling IOR"); 00110 getManager().setOptionValString(&OPT_IORtype, "None"); 00111 00112 // select an eyetrack EyeHeadController: 00113 if(itsEyeCompare.getVal()) 00114 getManager().setOptionValString(&OPT_EyeHeadControllerType, "EyeTrack"); 00115 } 00116 00117 // ###################################################################### 00118 SimulationViewerCompress::~SimulationViewerCompress() 00119 { } 00120 00121 // ###################################################################### 00122 void SimulationViewerCompress::paramChanged(ModelParamBase* const param, 00123 const bool valueChanged, 00124 ParamClient::ChangeStatus* status) 00125 { 00126 SimulationViewer::paramChanged(param, valueChanged, status); 00127 if (valueChanged && (param == &itsNumFoveas || param == &itsFoveaSCtype)) buildSCC(); 00128 } 00129 00130 // ###################################################################### 00131 void SimulationViewerCompress::buildSCC() 00132 { 00133 // drop any old SCCs: 00134 removeAllSubComponents(); 00135 00136 LINFO("Using %d SaccadeControllers of type %s", itsNumFoveas.getVal(), itsFoveaSCtype.getVal().c_str()); 00137 00138 // build an array of SCCs and export their options: 00139 for (int i = 0; i < itsNumFoveas.getVal(); i ++) 00140 { 00141 nub::soft_ref<SaccadeControllerEyeConfigurator> scc(new SaccadeControllerEyeConfigurator(getManager())); 00142 // we need to change the tag name so that we won't get confused 00143 // among our various SCCs: 00144 char num[10]; sprintf(num, "%d", i); 00145 scc->setTagName(scc->tagName() + num); 00146 scc->setDescriptiveName(scc->descriptiveName() + " " + std::string(num)); 00147 scc->exportOptions(MC_RECURSE); 00148 addSubComponent(scc); 00149 00150 // let's change its SC type to not what the command-line says, 00151 // but what we say: 00152 scc->setModelParamString("SaccadeControllerEyeType", itsFoveaSCtype.getVal()); 00153 } 00154 } 00155 00156 // ###################################################################### 00157 void SimulationViewerCompress::start1() 00158 { 00159 itsSC.clear(); itsEyeData.clear(); 00160 itsMask.setMaxSize(itsCacheSize.getVal()); 00161 00162 // setup shortcuts to our configured SCs: 00163 for (uint i = 0; i < numSubComp(); i ++) { 00164 nub::soft_ref<SaccadeControllerEyeConfigurator> scc = dynCast<SaccadeControllerEyeConfigurator>(subComponent(i)); 00165 itsSC.push_back(scc->getSC()); 00166 itsIgnoreSC.push_back(false); 00167 } 00168 00169 // open output file if any: 00170 if (itsOutFname.getVal().empty() == false) { 00171 itsOutFile = fopen(itsOutFname.getVal().c_str(), "w"); 00172 if (itsOutFile == NULL) PLFATAL("Cannot write '%s'", itsOutFname.getVal().c_str()); 00173 } 00174 00175 SimulationViewer::start1(); 00176 } 00177 00178 // ###################################################################### 00179 void SimulationViewerCompress::stop1() 00180 { 00181 if (itsOutFile) { fclose(itsOutFile); itsOutFile = NULL; } 00182 } 00183 00184 // ###################################################################### 00185 void SimulationViewerCompress:: 00186 onSimEventRetinaImage(SimEventQueue& q, rutz::shared_ptr<SimEventRetinaImage>& e) 00187 { 00188 itsInputTime = q.now(); // keep track of time of last input 00189 ++ itsFrame; // keep track of frame number, to decide whether I-frame 00190 itsRawInputRectangle = e->rawInputRectangle(); 00191 00192 // get a foveation pyramid ready: 00193 itsMultiTraj = buildPyrGaussian(e->frame().colorByte(), 0, itsMultiRetinaDepth.getVal(), 9); 00194 } 00195 00196 // ###################################################################### 00197 void SimulationViewerCompress:: 00198 onSimEventSaccadeStatusEye(SimEventQueue& q, rutz::shared_ptr<SimEventSaccadeStatusEye>& e) 00199 { 00200 // all the analysis will be done in getTraj(), so that we don't have 00201 // to recompute the whole blur mask at every eye movement 00202 // sample. Hence, here we just queue up the eye positions received: 00203 itsEyeData.push_back(e->position()); 00204 } 00205 00206 // ###################################################################### 00207 Image< PixRGB<byte> > SimulationViewerCompress::getTraj(SimEventQueue& q) 00208 { 00209 Dims dims = itsMultiTraj[0].getDims(); // input image dims 00210 00211 // let's get the current normalized (values in 0..255) saliency map: 00212 Image<float> sm = getMap(q, true); 00213 00214 // find the top itsNumFoveas salient locations; to this end, we will 00215 // find the max in sm, then use the FOAradius to draw a disk at the 00216 // top location, then loop as many times as we have foveas: 00217 Image<float> smf = rescaleOpt(sm, dims, itsDisplayInterp.getVal()); 00218 00219 // in this function, we merge two behaviors: if we have 00220 // subcomponents (i.e., SaccadeControllers), then we will work in a 00221 // mode where we have a bunch of foveas moving around. Otherwise, we 00222 // will use the saliency map as a continuous modulator of blur: 00223 Image<byte> msk; 00224 if (itsSC.size()) msk = getMaskSC(smf, q); else msk = getMaskSM(smf); 00225 00226 // add this mask to our sliding average: 00227 if (itsCacheSize.getVal() > 0) // using a sliding average cache 00228 { 00229 itsMask.push_back(msk); 00230 00231 // are we on an I-frame? If so, update our itsCurrentMask, and use 00232 // it. Otherwise, use the current contents of itsCurrentMask: 00233 if (itsFrame % itsIFramePeriod.getVal() == 0) itsCurrentMask = itsMask.mean(); 00234 } 00235 else 00236 itsCurrentMask = msk; // just using instantaneous mask 00237 00238 // update blur mask using TRM if needed, otherwise it's just itsCurrentMask: 00239 if (itsUseTRMmax.getVal() && itsBlurMask.initialized()) { 00240 // if a location is rapidly changing (high TRM value), we use the 00241 // value of itsCurrentMask for our blur; otherwise, we take the 00242 // min between itsCurrentMask and our accumulated itsBlurMask: 00243 Image<byte> minMask = takeMin(itsBlurMask, itsCurrentMask); 00244 if (SeC<SimEventTaskRelevanceMapOutput> e = q.check<SimEventTaskRelevanceMapOutput>(this, SEQ_ANY)) { 00245 Image<float> trm = rescaleOpt(e->trm(1.0F), dims, itsDisplayInterp.getVal()); 00246 itsBlurMask = thresholdedMix(trm, 0.99F, minMask, itsCurrentMask); 00247 } else LFATAL("Cannot find a TRM!"); 00248 } else itsBlurMask = itsCurrentMask; 00249 00250 // we use the mean of our mask sliding average for a weighted blur: 00251 Image< PixRGB<byte> > traj = weightedBlur(itsBlurMask, itsMultiTraj); // weighted blur 00252 00253 // draw a patch at center of each fovea? 00254 if (itsDisplayPatch.getVal()) 00255 { 00256 // select a drawing color & size: 00257 PixRGB<byte> col(itsColorNormal.getVal()); int psiz = 4 + 2*itsSC.size(); 00258 00259 // draw a patch at current position of each fovea: 00260 for (uint i = 0; i < itsSC.size(); i ++) 00261 if (itsIgnoreSC[i] == false) drawPatchBB(traj, itsSC[i]->getPreviousDecision(0).p, psiz-i*2, col); 00262 } 00263 00264 // draw FOA outlines? 00265 if (itsDisplayFOA.getVal()) 00266 { 00267 // select a drawing color & size: 00268 PixRGB<byte> col(itsColorNormal.getVal()); int thick = 3; 00269 00270 Image<byte> om(itsCurrentMask); 00271 om = binaryReverse(om, byte(255)); 00272 inplaceLowThresh(om, byte(220), byte(0)); // get the objects 00273 om = contour2D(om); // compute binary contour image 00274 int w = traj.getWidth(), h = traj.getHeight(); 00275 Point2D<int> ppp; 00276 for (ppp.j = 0; ppp.j < h; ppp.j ++) 00277 for (ppp.i = 0; ppp.i < w; ppp.i ++) 00278 if (om.getVal(ppp.i, ppp.j)) // got a contour point -> draw here 00279 drawDisk(traj, ppp, thick, col); // small disk for each point 00280 } 00281 00282 // prepare a full-size color version of the SM for our various markings: 00283 Image< PixRGB<byte> > colorsm = toRGB(Image<byte>(rescaleOpt(sm, dims, itsDisplayInterp.getVal()))); 00284 00285 // get the raw, unfoveated input image and paste it into an image with our dims: 00286 Image< PixRGB<byte> > rawinp2; 00287 if (SeC<SimEventInputFrame> e = q.check<SimEventInputFrame>(this)) rawinp2 = e->frame().asRgb(); 00288 Image< PixRGB<byte> > rawinp(dims, NO_INIT); rawinp.clear(PixRGB<byte>(64)); 00289 Point2D<int> rawinpoff((rawinp.getWidth() - rawinp2.getWidth())/2, (rawinp.getHeight() - rawinp2.getHeight())/2); 00290 inplacePaste(rawinp, rawinp2, rawinpoff); 00291 00292 // do we want to compare to human eye movement data? 00293 if (itsOutFile) 00294 { 00295 // compute average blur for this frame: 00296 byte mi, ma, avg; getMinMaxAvg(itsCurrentMask, mi, ma, avg); 00297 00298 // get the raw SM: 00299 Image<float> rawsm; 00300 if (SeC<SimEventSaliencyMapOutput> e = q.check<SimEventSaliencyMapOutput>(this, SEQ_ANY)) 00301 rawsm = e->sm(1.0F); else LFATAL("Cannot find a SM!"); 00302 00303 // get the map level to scale things down: 00304 int sml = itsLevelSpec.getVal().mapLevel(); 00305 00306 // let's get the raw saliency map and a vector of all our 00307 // conspicuity maps and of their min/max/avg: 00308 std::vector< Image<float> > cmap; 00309 std::vector<float> cmi, cma, cav; 00310 00311 // grab all the VisualCortex maps: 00312 rutz::shared_ptr<SimReqVCXmaps> vcxm(new SimReqVCXmaps(this)); 00313 q.request(vcxm); // VisualCortex is now filling-in the maps... 00314 rutz::shared_ptr<ChannelMaps> chm = vcxm->channelmaps(); 00315 00316 // find out a window to use for our random values, which is 00317 // important in case we are doing input shifting with 00318 // --shift-input and apply a field of view with 00319 // --input-fov. In these cases, we want to take random samples 00320 // only withing the actual display area: 00321 Rectangle r = itsRawInputRectangle; 00322 //drawRect(colorsm, r, PixRGB<byte>(0, 255, 0), 2); 00323 //drawRect(traj, r, PixRGB<byte>(0, 255, 0), 2); 00324 00325 // get a version of the rectangle scaled to SM dims: 00326 Rectangle rsm = Rectangle::tlbrI(r.top() >> sml, r.left() >> sml, r.bottomO() >> sml, r.rightO() >> sml); 00327 rsm = rsm.getOverlap(rawsm.getBounds()); 00328 00329 // let's get the raw saliency map and a vector of all our 00330 // conspicuity maps and of their min/max/avg: 00331 Image<float> cropsm = crop(rawsm, rsm); 00332 float rawsmmi, rawsmma, rawsmav; 00333 getMinMaxAvg(cropsm, rawsmmi, rawsmma, rawsmav); 00334 00335 // now for all the top-level channel conspicuity maps: 00336 for (uint ii = 0; ii < chm->numSubchans(); ii ++) { 00337 Image<float> cm = chm->subChanMaps(ii)->getMap(); 00338 if (cm.initialized() == false) cm.resize(rawsm.getDims(), true); // some channels may not have maps yet 00339 Image<float> cropmap = crop(cm, rsm); 00340 float ccmi, ccma, ccav; 00341 getMinMaxAvg(cropmap, ccmi, ccma, ccav); 00342 cmi.push_back(ccmi); cma.push_back(ccma); cav.push_back(ccav); 00343 cmap.push_back(cropmap); 00344 } 00345 00346 // loop over all fixations that happened during current frame: 00347 while(itsEyeData.size()) { 00348 // pick a random location to get blur there: 00349 Point2D<int> rnd(randomUpToNotIncluding(itsCurrentMask.getWidth()), 00350 randomUpToNotIncluding(itsCurrentMask.getHeight())); 00351 00352 // get next eye fixation: 00353 Point2D<int> eye = itsEyeData.front(); itsEyeData.pop_front(); 00354 eye.clampToDims(itsCurrentMask.getDims()); 00355 00356 // also scale down eye coords to sm level: 00357 Point2D<int> eyesm(eye.i >> sml, eye.j >> sml); 00358 00359 // finally shift eyesm to reflect our crops of the sm and cmaps: 00360 eyesm.i -= rsm.left(); eyesm.j -= rsm.top(); 00361 eyesm.clampToDims(cropsm.getDims()); 00362 00363 // do we want to draw it? 00364 if (itsDisplayEye.getVal()) 00365 { 00366 // select a drawing color & size: 00367 PixRGB<byte> col(itsColorEye.getVal()); int psiz = 5; 00368 drawPatchBB(traj, eye, psiz, col); 00369 drawPatchBB(colorsm, eye, psiz, col); 00370 00371 // grab the latest retina: 00372 Point2D<int> rieye; 00373 if (SeC<SimEventRetinaImage> e = q.check<SimEventRetinaImage>(this, SEQ_ANY)) 00374 rieye = e->retinalToRaw(eye); 00375 else LFATAL("ooops, no retina image in the queue?"); 00376 rieye += rawinpoff; 00377 drawPatchBB(rawinp, rieye, psiz, col); 00378 } 00379 00380 fprintf(itsOutFile, "%d %d %d %d %d %d", 00381 eye.i, // eye x position 00382 eye.j, // eye y position 00383 itsCurrentMask.getVal(eye), // blur val at eye 00384 mi, // min val of mask 00385 ma, // max val of mask 00386 avg); // average val of mask 00387 00388 LINFO("eye pos and blur val at eye, mi, ma, avg:%d %d %d %d %d %d", 00389 eye.i, eye.j, itsCurrentMask.getVal(eye),mi,ma, avg); 00390 for(int k=0; k<itsNumRandomSamples.getVal(); k++) 00391 { 00392 Point2D<int> randp(randomUpToNotIncluding(itsCurrentMask.getWidth()), 00393 randomUpToNotIncluding(itsCurrentMask.getHeight())); 00394 fprintf(itsOutFile, " %d", itsCurrentMask.getVal(randp)); 00395 } 00396 fprintf(itsOutFile, "\n"); 00397 } 00398 } 00399 00400 // do we want a mega combo instead of the plain blurred image? 00401 if (itsSaveMegaCombo.getVal()) 00402 { 00403 Image< PixRGB<byte> > ret = 00404 concatX(colGreyCombo(itsMultiTraj[0], rescaleOpt(sm, dims, itsDisplayInterp.getVal()), false), 00405 colGreyCombo(traj, itsCurrentMask, false)); 00406 drawGrid(ret, 2, 2, 2, PixRGB<byte>(128)); 00407 return ret; 00408 } 00409 00410 // do we want a mask only? 00411 if(itsSaveMask.getVal()) return itsCurrentMask; 00412 00413 // do we want a foveated image only? 00414 if(itsSaveFoveatedImage.getVal()) return traj; 00415 00416 // do we want an eye combo? 00417 if (itsSaveEyeCombo.getVal()) 00418 { 00419 Image< PixRGB<byte> > ret = concatX(concatX(rawinp, traj), colorsm); 00420 drawLine(ret, Point2D<int>(dims.w()-1, 0), Point2D<int>(dims.w()-1, dims.h()-1), PixRGB<byte>(255,255,0), 3); 00421 drawLine(ret, Point2D<int>(dims.w()*2-1, 0), Point2D<int>(dims.w()*2-1,dims.h()-1), PixRGB<byte>(255,255,0), 3); 00422 00423 // make sure the size is reasonable... 00424 while(ret.getWidth() > 1024) ret = decXY(lowPass3(ret)); 00425 00426 return ret; 00427 } 00428 00429 // otherwise return the blurred image: 00430 return traj; 00431 } 00432 00433 // ###################################################################### 00434 Image<byte> SimulationViewerCompress::getMaskSM(const Image<float>& smf) 00435 { 00436 // let's start by smoothing the interpolated salmap a bit: 00437 Image<float> maskf = lowPass9(smf); 00438 00439 // let's squash the SM a bit. We downplay values below average and 00440 // give more range to those above average (with possible saturation): 00441 float mi, ma, av; getMinMaxAvg(maskf, mi, ma, av); 00442 maskf = squash(maskf, mi, mi, 0.5F*(av-mi), 0.55F*(av-mi), ma, ma); 00443 00444 // make a blurring mask: 00445 Image<byte> mask = binaryReverse(maskf, 255.0F); // will clamp to 0..255 00446 00447 return mask; 00448 } 00449 00450 // ###################################################################### 00451 00452 namespace 00453 { 00454 struct Point2DS 00455 { 00456 Point2DS(const Point2D<int>& pp, double ss) : p(pp), sal(ss) {} 00457 00458 Point2D<int> p; 00459 double sal; 00460 }; 00461 } 00462 00463 Image<byte> SimulationViewerCompress::getMaskSC(const Image<float>& smf, 00464 SimEventQueue& q) 00465 { 00466 ///////nub::ref<VisualCortex> vc = itsBrain->getVC(); 00467 ////FIXME///nub::ref<ShapeEstimator> se = itsBrain->getSE(); 00468 00469 // violently reset the se: 00470 ////FIXME///se->reset(MC_RECURSE); 00471 00472 // find the top salient locations; to this end, we will find the max 00473 // in sm, then use the FOAradius to draw a disk at the top location, 00474 // then loop. We will extract more locations than we have foveas, so 00475 // that we are robust to slight changes in saliency ordering: 00476 std::vector<Point2DS> topsal; // will store saliency in 't' field 00477 Image<float> smff = smf; // get a copy we can modify 00478 for (uint i = 0; i < itsSC.size() + 4; i ++) { 00479 // find max: 00480 Point2D<int> p; float sal; findMax(smff, p, sal); 00481 00482 // store coords & saliency: 00483 topsal.push_back(Point2DS(p, double(sal))); 00484 00485 // get object shape at that location, or revert to a disk of no object: 00486 ////FIXME///se->compute(p); 00487 Image<byte> objmask; ////FIXME/// = se->getSmoothMask() * 255.0F; 00488 if (objmask.initialized() == false) { 00489 objmask.resize(smff.getDims(), true); 00490 drawDisk(objmask, p, itsFOAradius.getVal(), byte(255)); 00491 } 00492 // inhibit the sm by the object shape: 00493 inplaceSetValMask(smff, objmask, 0.0F); 00494 } 00495 00496 // if this is our first time (itsFeatures is empty), just assign an 00497 // SC to each of the top salient locations: 00498 if (itsFeatures.empty()) 00499 for (uint i = 0; i < itsSC.size(); i ++) 00500 { 00501 // feed the SC: 00502 itsSC[i]->setPercept(WTAwinner(topsal[i].p, q.now(), topsal[i].sal, false), q); 00503 00504 // keep track of the features each SC is tracking: 00505 rutz::shared_ptr<SimReqVCXfeatures> ef(new SimReqVCXfeatures(this, topsal[i].p)); 00506 q.request(ef); // VisualCortex is now filling-in the features into ef->features() 00507 itsFeatures.push_back(ef->features()); 00508 00509 LINFO("Initializing SC[%d] to (%d,%d)", i, topsal[i].p.i, topsal[i].p.j); 00510 } 00511 else 00512 { 00513 // let's get an idea of which features are important for 00514 // differentiating between our current foveas, and of what 00515 // their range is: 00516 std::vector<double> minf, maxf; uint nf = itsFeatures[0].size(); 00517 for (uint i = 0; i < nf; i ++) { minf.push_back(1.0e50); maxf.push_back(-1.0e50); } 00518 00519 for (uint fov = 0; fov < itsSC.size(); fov ++) 00520 for (uint i = 0; i < nf; i ++) 00521 { 00522 if (itsFeatures[fov][i] < minf[i]) minf[i] = itsFeatures[fov][i]; 00523 if (itsFeatures[fov][i] > maxf[i]) maxf[i] = itsFeatures[fov][i]; 00524 } 00525 00526 // solve correspondence problem: Score each salient location with 00527 // respect to each SC, based on distance and feature similarity in a 00528 // neighborhood: 00529 Image<float> score(topsal.size(), itsSC.size(), NO_INIT); 00530 for (uint i = 0; i < topsal.size(); i ++) 00531 for (uint j = 0; j < itsSC.size(); j ++) 00532 { 00533 // how well does salient location 'i' score with fovea 00534 // 'j'? First let's look at distance; what counts then is 00535 // the distance to the target (=percept) of the SC, not to 00536 // where the SC currently is: 00537 Point2D<int> pi = topsal[i].p; 00538 Point2D<int> pj = itsSC[j]->getPreviousPercept(0).p; 00539 float dist = pi.distance(pj); 00540 00541 // a distance of up to twice our FOA radius yields no penalty: 00542 if (dist < 2.0F * float(itsFOAradius.getVal())) dist = 0.0F; 00543 00544 // normalize the distance to 0..1 range: 00545 dist /= sqrt(smf.getWidth() * smf.getWidth() + smf.getHeight() * smf.getHeight()); 00546 00547 // now look at feature similarity; to this end, we explore 00548 // a neighborhood of the salient location and see whether 00549 // we can find the features that the SC is interested in: 00550 std::vector<float> scf = itsFeatures[j]; 00551 double fdist = 1.0e50; 00552 int ci = pi.i; if (ci & 1) ci --; 00553 int cj = pi.j; if (cj & 1) cj --; 00554 for (int jj = cj - 10; jj <= cj + 10; jj += 2) 00555 for (int ii = ci - 10; ii <= ci + 10; ii += 2) 00556 { 00557 Point2D<int> p(ii, jj); 00558 if (smf.coordsOk(p)) 00559 { 00560 // get a vector of features: 00561 rutz::shared_ptr<SimReqVCXfeatures> ef(new SimReqVCXfeatures(this, p)); 00562 q.request(ef); // VisualCortex is now filling-in the features into ef->features() 00563 const std::vector<float>& ff = ef->features(); 00564 00565 // compute feature distance: get feature 00566 // difference and normalize by feature range 00567 // if range not too small: 00568 double d = 0.0; int numf = 0; 00569 for (uint k = 0; k < nf; k ++) 00570 if (maxf[k] > minf[k] + 1.0) 00571 { 00572 double dd = (ff[k]-scf[k]) / (maxf[k]-minf[k]); 00573 00574 // accumulate compound feature distance: 00575 d += dd * dd; numf ++; 00576 } 00577 00578 // compute normalized weighted feature distance: 00579 if (numf) d = sqrt(d / double(numf)); 00580 00581 // if distance better than what we had, update: 00582 if (d < fdist) fdist = d; 00583 } 00584 } 00585 00586 // point distance and feature distance both contribute 00587 // to score. In addition, we add here a penalty for 00588 // changing the ordering of the SCs. Best possible score 00589 // is zero and all scores are negative, growing larger 00590 // negatively as they get worse: 00591 float sco = - 00592 float(dist * 100.0) - 00593 float(fdist * 0.5) - 00594 10.0F * fabs(float(i)-float(j)) + 00595 100.0F * topsal[i].sal / topsal[0].sal; 00596 00597 score.setVal(i, j, sco); 00598 LINFO("[topsal(%d)(%d,%d), SC(%d)(%d,%d)]: dist=%f fdist=%f " 00599 "score=%f", i, pi.i, pi.j, j, pj.i, pj.j, dist, fdist, sco); 00600 } 00601 00602 // find the best score and assign the corresponding salient 00603 // location to the corresponding SC; then kill that SC in the 00604 // score map and loop until all SCs have been assigned: 00605 for (uint i = 0; i < itsSC.size(); i ++) 00606 { 00607 Point2D<int> best; float val; findMax(score, best, val); 00608 int bi = best.i; // salient location 00609 int bj = best.j; // saccade controller 00610 00611 // set new percept to the winning SC: 00612 itsSC[bj]->setPercept(WTAwinner(topsal[bi].p, q.now(), topsal[bi].sal, false), q); 00613 // also assign new feature vector to that SC; even though 00614 // the best score may have been achieved for a neighbor of 00615 // the topsal location, we use as feature vector the one 00616 // from the topsal location: 00617 rutz::shared_ptr<SimReqVCXfeatures> ef(new SimReqVCXfeatures(this, topsal[bi].p)); 00618 q.request(ef); // VisualCortex is now filling-in the features into ef->features() 00619 itsFeatures[bj] = ef->features(); 00620 00621 // done with this SC; let's make sure we will not pick it up again: 00622 for (int k = 0; k < score.getWidth(); k ++) score.setVal(k, bj, -1.0e30F); 00623 00624 // also make sure we will not pick up that salient loc again: 00625 for (int k = 0; k < score.getHeight(); k ++) score.setVal(bi, k, -1.0e30F); 00626 00627 LINFO("Assigned topsal[%d](%d,%d) to SC[%d](%d,%d)", bi, topsal[bi].p.i, topsal[bi].p.j, bj, 00628 itsSC[bj]->getPreviousPercept(1).p.i, itsSC[bj]->getPreviousPercept(1).p.j); 00629 } 00630 } 00631 00632 // evolve our SCs: 00633 for (uint i = 0; i < itsSC.size(); ++i) itsSC[i]->evolve(q); 00634 00635 // run getDecision() on our SaccadeControllers: 00636 for (uint i = 0; i < itsSC.size(); ++i) itsSC[i]->getDecision(q, false); 00637 00638 // create a mask with the object shapes at each SC: 00639 ///FIXME///se->reset(MC_RECURSE); // violently reset the SE 00640 Image<float> maskf(smf.getDims(), ZEROS); 00641 for (uint i = 0; i < itsSC.size(); ++i) 00642 { 00643 // if we terminate this iteration early we will ignore this SC: 00644 itsIgnoreSC[i] = true; 00645 00646 // get the current fixation for this SC: 00647 Point2DT p = itsSC[i]->getPreviousDecision(0); 00648 if (p.p.isValid() == false) 00649 { LINFO("Ignoring SC[%d] because coords (%d,%d) invalid", i, p.p.i, p.p.j); continue; } 00650 00651 // if salience was very low, don't bother using this SC: 00652 if (smf.getVal(itsSC[i]->getPreviousPercept(0).p) < topsal[0].sal*0.05) 00653 { LINFO("Ignoring SC[%d] because salience too low", i); continue; } 00654 00655 // otherwise segment the object and mark it: 00656 ////FIXME///se->compute(p.p); 00657 Image<float> objf;////FIXME/// = se->getSmoothMask(); 00658 if (objf.initialized()) maskf = takeMax(maskf, objf); 00659 else drawDisk(maskf, p.p, itsFOAradius.getVal(), 1.0F); 00660 00661 // ok, we won't ignore this SC: 00662 itsIgnoreSC[i] = false; 00663 } 00664 00665 // binarize the object mask: 00666 inplaceLowThresh(maskf, 0.5F, 0.0F); 00667 00668 // create a distance map from the mask (this code similar to that in foveate()): 00669 float maxdist = std::max(smf.getWidth(), smf.getHeight()) * 2 * itsDistanceFactor.getVal(); 00670 float scalefac = maxdist / 255.0f; 00671 maskf = chamfer34(maskf, maxdist) / scalefac; 00672 00673 // if modulator does not contain any point at zero (inside object), 00674 // that means that the mask was empty, which is the case at the 00675 // beginning of a simulation. Set it to some intermediary value to 00676 // provide a uniform medium blur; otherwise, squash it: 00677 float mi, ma, av; getMinMaxAvg(maskf, mi, ma, av); 00678 if (mi > 0.0F) maskf /= 3.0F; 00679 else maskf = squash(maskf, mi, mi, 0.5F*(av-mi), 0.6F*(av-mi), ma, ma); 00680 00681 // return byte version of the mask: 00682 Image<byte> mask = maskf; // will clamp as necessary 00683 return mask; 00684 } 00685 00686 // ###################################################################### 00687 void SimulationViewerCompress:: 00688 onSimEventSaveOutput(SimEventQueue& q, rutz::shared_ptr<SimEventSaveOutput>& e) 00689 { 00690 // update the trajectory: 00691 Image< PixRGB<byte> > res = getTraj(q); 00692 00693 // save results? 00694 if (itsSaveTraj.getVal() || itsSaveMegaCombo.getVal() || 00695 itsSaveEyeCombo.getVal() || itsSaveMask.getVal() || itsSaveFoveatedImage.getVal()) 00696 { 00697 // get the OFS to save to, assuming sinfo is of type 00698 // SimModuleSaveInfo (will throw a fatal exception otherwise): 00699 nub::ref<FrameOstream> ofs = dynamic_cast<const SimModuleSaveInfo&>(e->sinfo()).ofs; 00700 00701 ofs->writeRGB(res, "T", FrameInfo("SimulationViewerCompress trajectory", SRC_POS)); 00702 } 00703 } 00704 00705 // ###################################################################### 00706 float SimulationViewerCompress::getSample(const Image<float>& smap, 00707 const Point2D<int>& p, 00708 const int radius) const 00709 { 00710 // ### disk version: 00711 Image<float> fov(smap.getDims(), ZEROS); 00712 drawDisk(fov, p, radius, 1.0F); 00713 fov *= smap; // max smap by the disk 00714 float junk, salience; 00715 getMinMax(fov, junk, salience); 00716 return salience; 00717 00718 // ### point version: 00719 // return smap.getVal(p); 00720 } 00721 00722 // ###################################################################### 00723 float SimulationViewerCompress:: 00724 getRandomSample(const Image<float>& smap, const int radius, const int n) const 00725 { 00726 float rndval = 0.0f; 00727 for (int i = 0; i < n; i ++) 00728 { 00729 Point2D<int> rndsm(randomUpToNotIncluding(smap.getWidth()), randomUpToNotIncluding(smap.getHeight())); 00730 rndval += getSample(smap, rndsm, radius); 00731 } 00732 return rndval / n; 00733 } 00734 00735 // ###################################################################### 00736 /* So things look consistent in everyone's emacs... */ 00737 /* Local Variables: */ 00738 /* indent-tabs-mode: nil */ 00739 /* End: */