00001 /*!@file Neuro/VisualBuffer.C Grab ausio samples from /dev/dsp */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00005 // University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Laurent Itti <itti@usc.edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Neuro/VisualBuffer.C $ 00035 // $Id: VisualBuffer.C 13065 2010-03-28 00:01:00Z itti $ 00036 // 00037 00038 #include "Neuro/VisualBuffer.H" 00039 00040 #include "Channels/ChannelOpts.H" 00041 #include "Component/OptionManager.H" 00042 #include "Image/DrawOps.H" 00043 #include "Image/FilterOps.H" 00044 #include "Image/MathOps.H" 00045 #include "Image/ShapeOps.H" 00046 #include "Image/Transforms.H" 00047 #include "Image/fancynorm.H" 00048 #include "Media/MediaOpts.H" 00049 #include "Neuro/NeuroOpts.H" 00050 #include "Neuro/Retina.H" 00051 #include "Simulation/SimEventQueue.H" 00052 #include "Util/log.H" 00053 00054 // ###################################################################### 00055 VisualBuffer::VisualBuffer(OptionManager& mgr, 00056 const std::string& descrName, 00057 const std::string& tagName) : 00058 SimModule(mgr, descrName, tagName) 00059 { } 00060 00061 // ###################################################################### 00062 VisualBuffer::~VisualBuffer() 00063 { } 00064 00065 // ###################################################################### 00066 VisualBufferStub::VisualBufferStub(OptionManager& mgr, 00067 const std::string& descrName, 00068 const std::string& tagName) : 00069 VisualBuffer(mgr, descrName, tagName) 00070 { } 00071 00072 // ###################################################################### 00073 VisualBufferStub::~VisualBufferStub() 00074 { } 00075 00076 // ###################################################################### 00077 VisualBufferConfigurator:: 00078 VisualBufferConfigurator(OptionManager& mgr, 00079 const std::string& descrName, 00080 const std::string& tagName) : 00081 ModelComponent(mgr, descrName, tagName), 00082 itsVBtype(&OPT_VisualBufferType, this), 00083 itsVB(new VisualBufferStub(mgr)) 00084 { 00085 addSubComponent(itsVB); 00086 } 00087 00088 // ###################################################################### 00089 VisualBufferConfigurator::~VisualBufferConfigurator() 00090 { } 00091 00092 // ###################################################################### 00093 nub::ref<VisualBuffer> VisualBufferConfigurator::getVB() const 00094 { 00095 return itsVB; 00096 } 00097 00098 // ###################################################################### 00099 void VisualBufferConfigurator::paramChanged(ModelParamBase* const param, 00100 const bool valueChanged, 00101 ParamClient::ChangeStatus* status) 00102 { 00103 ModelComponent::paramChanged(param, valueChanged, status); 00104 00105 // was that a change of our baby's name? 00106 if (param == &itsVBtype) { 00107 // if we had one, let's unregister it (when we later reset() the 00108 // nub::ref, the current VisualBuffer will unexport its 00109 // command-line options): 00110 removeSubComponent(*itsVB); 00111 00112 // instantiate a SM of the appropriate type: 00113 if (itsVBtype.getVal().compare("None") == 0 || 00114 itsVBtype.getVal().compare("Stub") == 0) // no VB 00115 itsVB.reset(new VisualBufferStub(getManager())); 00116 else if (itsVBtype.getVal().compare("Std") == 0) // standard 00117 itsVB.reset(new VisualBufferStd(getManager())); 00118 else 00119 LFATAL("Unknown VB type %s", itsVBtype.getVal().c_str()); 00120 00121 // add our baby as a subcomponent of us so that it will become 00122 // linked to the manager through us (hopefully we are registered 00123 // with the manager), which in turn will allow it to export its 00124 // command-line options and get configured: 00125 addSubComponent(itsVB); 00126 00127 // tell the controller to export its options: 00128 itsVB->exportOptions(MC_RECURSE); 00129 00130 // some info message: 00131 LINFO("Selected VB of type %s", itsVBtype.getVal().c_str()); 00132 } 00133 } 00134 00135 // ###################################################################### 00136 VisualBufferStd::VisualBufferStd(OptionManager& mgr, 00137 const std::string& descrName, 00138 const std::string& tagName) : 00139 VisualBuffer(mgr, descrName, tagName), 00140 SIMCALLBACK_INIT(SimEventAttentionGuidanceMapOutput), 00141 SIMCALLBACK_INIT(SimEventRetinaImage), 00142 itsFOAradius(&OPT_FOAradius, this), 00143 itsIgnoreBoring(&OPT_VBignoreBoring, this), // see Neuro/NeuroOpts.{H,C} 00144 itsObjectBased(&OPT_VBobjectBased, this), // see Neuro/NeuroOpts.{H,C} 00145 itsBufferDims(&OPT_VBdims, this), // see Neuro/NeuroOpts.{H,C} 00146 itsLevelSpec(&OPT_LevelSpec, this), // see Channels/ChannelOpts.{H,C} 00147 itsInputDims(&OPT_InputFrameDims, this), // see Media/MediaOpts.{H,C} 00148 itsTimePeriod(&OPT_VBtimePeriod, this), // see Neuro/NeuroOpts.{H,C} 00149 itsDecayFactor(&OPT_VBdecayFactor, this), 00150 itsNormType(&OPT_VBmaxNormType, this), // see Neuro/NeuroOpts.{H,C} 00151 itsBuffer(), itsSMdims(), itsSaliencyMask(), itsTime(), 00152 itsLastInteractTime(), itsRetinaOffset(0, 0) 00153 { } 00154 00155 // ###################################################################### 00156 void VisualBufferStd::start1() 00157 { 00158 // initialize our buffer if we have valid dims for it: 00159 Dims d = itsBufferDims.getVal(); 00160 if (d.isNonEmpty()) 00161 { 00162 itsBuffer.resize(d, true); 00163 LINFO("Using buffer dims of (%d, %d)", d.w(), d.h()); 00164 } 00165 LINFO("Using internal maxnorm of type %s, decay %f", 00166 itsNormType.getValString().c_str(), itsDecayFactor.getVal()); 00167 } 00168 00169 // ###################################################################### 00170 VisualBufferStd::~VisualBufferStd() 00171 { } 00172 00173 // ###################################################################### 00174 void VisualBufferStd:: 00175 onSimEventRetinaImage(SimEventQueue& q, rutz::shared_ptr<SimEventRetinaImage>& e) 00176 { 00177 // just keep track of our retina offset 00178 itsRetinaOffset = e->offset(); 00179 } 00180 00181 // ###################################################################### 00182 void VisualBufferStd:: 00183 onSimEventAttentionGuidanceMapOutput(SimEventQueue& q, rutz::shared_ptr<SimEventAttentionGuidanceMapOutput>& e) 00184 { 00185 // grab the agm:: 00186 Image<float> agm = e->agm(); 00187 00188 // if our buffer is not object based, pass it the current saliency 00189 // map now (the winner location does not matter), otherwise, do it 00190 // only on every WTA winner: 00191 if (isObjectBased() == false) 00192 input(WTAwinner(Point2D<int>(0, 0), q.now(), 0.0, false), agm, Image<byte>()); 00193 else if (SeC<SimEventWTAwinner> e = q.check<SimEventWTAwinner>(this)) 00194 { 00195 const WTAwinner& win = e->winner(); 00196 00197 // Any output from the ShapeEstimator? 00198 Image<byte> foaMask; 00199 if (SeC<SimEventShapeEstimatorOutput> e = q.check<SimEventShapeEstimatorOutput>(this)) 00200 foaMask = Image<byte>(e->smoothMask() * 255.0F); 00201 00202 // all right, bufferize that stuff! 00203 input(win, agm, foaMask); 00204 } 00205 00206 // evolve our internals one time step: 00207 this->evolve(q); 00208 } 00209 00210 // ###################################################################### 00211 void VisualBufferStd::input(const WTAwinner& win, const Image<float>& sm, 00212 const Image<byte>& objmask) 00213 { 00214 // update our sm dims: 00215 itsSMdims = sm.getDims(); 00216 00217 // if our buffer size has not been set yet, do it now, and use the 00218 // size of the sm as our buffer size: 00219 if (itsBuffer.initialized() == false) { 00220 itsBuffer.resize(itsSMdims, true); 00221 LINFO("Using buffer dims of (%d, %d)", itsSMdims.w(), itsSMdims.h()); 00222 } 00223 00224 // ignore boring attention shifts: 00225 if (win.boring && itsIgnoreBoring.getVal()) 00226 { LINFO("Ignoring boring attention shift"); return; } 00227 00228 // Let's build a multiplicative mask that we will apply to the sm 00229 // and that will determine what saliency information gets 00230 // transferred into the buffer. We have two modes here, either 00231 // object-based (only transfer attended object) or map-based 00232 // (transfer whole map). In addition, we have two sub-cases 00233 // depending on whether objmask is initialized (then use it to 00234 // define the object) or not (then use a disk). 00235 00236 Image<float> maskedsm; // the sm masked by our transfer mask 00237 Image<float> mask; // the saliency mask 00238 00239 if (itsObjectBased.getVal()) { 00240 // Object-based model. We start by building a float mask at retinal 00241 // resolution and coordinates: 00242 Image<byte> maskb; 00243 00244 if (objmask.initialized()) { 00245 // build our mask using the object definition passed to us: 00246 maskb = objmask; // get the (fuzzy-boundary) object shape 00247 inplaceLowThresh(maskb, byte(255)); // get tight object boundaries 00248 } else { 00249 // build our mask using a disk and distance-based decay: 00250 maskb.resize(itsInputDims.getVal(), true); // create empty mask 00251 drawDisk(maskb, win.p, itsFOAradius.getVal(), byte(255)); 00252 } 00253 00254 maskb = chamfer34(maskb); // introduce a distance-based spatial decay 00255 mask = maskb; // convert to float; range is 0..255 00256 mask = rescale(mask, sm.getDims()); // downsize 00257 mask = binaryReverse(mask, 255.0F); 00258 mask = squash(mask, 0.0F, 0.0F, 128.0F, 0.25F, 255.0F, 1.0F); // squash 00259 maskedsm = sm * mask; 00260 } else { 00261 // we are not object-based and want to transfer the whole map: 00262 maskedsm = sm; 00263 mask.resize(sm.getDims()); mask.clear(1.0F); 00264 } 00265 00266 // now let's take the max between our current buffer and our masked 00267 // sm, after shifting the masked sm to world-centered coordinates. 00268 // We start by computing the world coords of the top-left corner of 00269 // the masked sm, at maplevel: 00270 Point2D<int> tl = retinalToBuffer(Point2D<int>(0, 0)); 00271 Image<float> newbuf(itsBuffer.getDims(), ZEROS); 00272 pasteImage(newbuf, maskedsm, 0.0F, tl); 00273 00274 itsBuffer = takeMax(itsBuffer, newbuf); 00275 00276 // apply one iteration of our internal dynamics: 00277 internalDynamics(); 00278 00279 // for display purposes, keep a copy of the mask used to transfer saliency: 00280 itsSaliencyMask = Image<byte>(mask * 255.0F); 00281 00282 // the internal dynamics of the buffer are taken care of in evolve() 00283 } 00284 00285 // ###################################################################### 00286 Image<byte> VisualBufferStd::getSaliencyMask() const 00287 { return itsSaliencyMask; } 00288 00289 // ###################################################################### 00290 void VisualBufferStd::evolve(SimEventQueue& q) 00291 { 00292 itsTime = q.now(); 00293 // apply one iteration of our internal dynamics if the time has come: 00294 if (itsTime - itsLastInteractTime >= itsTimePeriod.getVal()) 00295 internalDynamics(); // will update itsLastInteractTime 00296 00297 // post our buffer 00298 00299 } 00300 00301 // ###################################################################### 00302 void VisualBufferStd::inhibit(const Point2D<int>& loc) 00303 { 00304 Image<float> mask(itsBuffer.getDims(), ZEROS); 00305 drawDisk(mask, loc, itsFOAradius.getVal() >> itsLevelSpec.getVal().mapLevel(), 1.0F); 00306 inhibit(mask); 00307 } 00308 00309 // ###################################################################### 00310 void VisualBufferStd::inhibit(const Image<float>& mask) 00311 { 00312 Image<float> inhib = binaryReverse(mask, 1.0F); 00313 itsBuffer *= inhib; 00314 } 00315 00316 // ###################################################################### 00317 Point2D<int> VisualBufferStd::findMostInterestingTarget(const Point2D<int>& p) 00318 { 00319 // let's start by cutting-off locations below a given fraction of 00320 // the max activation over the buffer, and binarize the rest: 00321 Image<float> buf = itsBuffer; float mi, ma; getMinMax(buf, mi, ma); 00322 Image<byte> bin = makeBinary(buf, ma * 0.25F, 0, 1); 00323 00324 // now let's find the cluster that is closest to our current eye 00325 // position. For that, we compute a distance map from a single pixel 00326 // at current eye position, multiply it by our binary mask, and look 00327 // for the smallest non-zero value: 00328 Image<byte> dmap(bin.getDims(), ZEROS); 00329 dmap.setVal(p, 255); dmap = chamfer34(dmap); 00330 Image<byte> prod = bin * dmap; 00331 inplaceReplaceVal(prod, 0, 255); // eliminate zero distances outside our blobs 00332 00333 Point2D<int> minp; byte minval; 00334 findMin(prod, minp, minval); 00335 00336 // minp belongs to our closest cluster. Let's segment that cluster 00337 // out so that we can compute its center of gravity: 00338 Image<byte> obj; flood(bin, obj, minp, byte(1), byte(1)); 00339 Image<float> objf = itsBuffer * obj; 00340 00341 return centroid(objf); 00342 } 00343 00344 // ###################################################################### 00345 Point2D<int> VisualBufferStd::findMostInterestingTargetLocMax(const Point2D<int>& p) 00346 { 00347 // let's start by getting an idea of the range of values in our 00348 // buffer after we blur it a bit: 00349 Image<float> buf = lowPass9(itsBuffer); 00350 float mi, ma; getMinMax(buf, mi, ma); 00351 float thresh = ma * 0.25F; 00352 00353 // let's go over the image and find the local max that is above a 00354 // threshold and closest to our current fixation. The code here is 00355 // similar to what we have in maxNormalizeStd() to find local maxes, 00356 // except that here we want to enforce a true local max, not a ridge 00357 // point (hence we use strict inequalities): 00358 int w = buf.getWidth(), h = buf.getHeight(); 00359 Point2D<int> best(-1, -1); float bestdist(1.0e10); 00360 for (int j = 1; j < h - 1; j ++) 00361 for (int i = 1; i < w - 1; i ++) 00362 { 00363 int index = i + w * j; 00364 float val = buf.getVal(index); 00365 if (val >= thresh && // strong enough activity 00366 val > buf.getVal(index - w) && // local max 00367 val > buf.getVal(index + w) && // local max 00368 val > buf.getVal(index - 1) && // local max 00369 val > buf.getVal(index + 1) && // local max 00370 p.distance(Point2D<int>(i, j)) < bestdist) // closest to eye 00371 { best.i = i; best.j = j; bestdist = p.distance(best); } 00372 } 00373 return best; 00374 } 00375 00376 // ###################################################################### 00377 Image<float> VisualBufferStd::getBuffer() const 00378 { return itsBuffer; } 00379 00380 // ###################################################################### 00381 Point2D<int> VisualBufferStd::retinalToBuffer(const Point2D<int>& p) const 00382 { 00383 return retinalToVisualBuffer(p, itsRetinaOffset, itsLevelSpec.getVal().mapLevel(), itsSMdims, itsBuffer.getDims()); 00384 } 00385 00386 // ###################################################################### 00387 Point2D<int> VisualBufferStd::bufferToRetinal(const Point2D<int>& p) const 00388 { 00389 return visualBufferToRetinal(p, itsRetinaOffset, itsLevelSpec.getVal().mapLevel(), itsSMdims, itsBuffer.getDims()); 00390 } 00391 00392 // ###################################################################### 00393 bool VisualBufferStd::isObjectBased() const 00394 { return itsObjectBased.getVal(); } 00395 00396 // ###################################################################### 00397 void VisualBufferStd::internalDynamics() 00398 { 00399 float mi, ma; getMinMax(itsBuffer, mi, ma); 00400 itsBuffer = maxNormalize(itsBuffer, 0.0F, 0.0F, itsNormType.getVal()); 00401 inplaceNormalize(itsBuffer, mi, ma); 00402 if (itsDecayFactor.getVal() != 1.0F) itsBuffer *= itsDecayFactor.getVal(); 00403 itsLastInteractTime = itsTime; 00404 } 00405 00406 00407 // ###################################################################### 00408 /* So things look consistent in everyone's emacs... */ 00409 /* Local Variables: */ 00410 /* indent-tabs-mode: nil */ 00411 /* End: */