VisualBuffer.C

Go to the documentation of this file.
00001 /*!@file Neuro/VisualBuffer.C Grab ausio samples from /dev/dsp */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the //
00005 // University of Southern California (USC) and the iLab at USC.         //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: Laurent Itti <itti@usc.edu>
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Neuro/VisualBuffer.C $
00035 // $Id: VisualBuffer.C 13065 2010-03-28 00:01:00Z itti $
00036 //
00037 
00038 #include "Neuro/VisualBuffer.H"
00039 
00040 #include "Channels/ChannelOpts.H"
00041 #include "Component/OptionManager.H"
00042 #include "Image/DrawOps.H"
00043 #include "Image/FilterOps.H"
00044 #include "Image/MathOps.H"
00045 #include "Image/ShapeOps.H"
00046 #include "Image/Transforms.H"
00047 #include "Image/fancynorm.H"
00048 #include "Media/MediaOpts.H"
00049 #include "Neuro/NeuroOpts.H"
00050 #include "Neuro/Retina.H"
00051 #include "Simulation/SimEventQueue.H"
00052 #include "Util/log.H"
00053 
00054 // ######################################################################
00055 VisualBuffer::VisualBuffer(OptionManager& mgr,
00056                            const std::string& descrName,
00057                            const std::string& tagName) :
00058   SimModule(mgr, descrName, tagName)
00059 { }
00060 
00061 // ######################################################################
00062 VisualBuffer::~VisualBuffer()
00063 {  }
00064 
00065 // ######################################################################
00066 VisualBufferStub::VisualBufferStub(OptionManager& mgr,
00067                            const std::string& descrName,
00068                            const std::string& tagName) :
00069   VisualBuffer(mgr, descrName, tagName)
00070 { }
00071 
00072 // ######################################################################
00073 VisualBufferStub::~VisualBufferStub()
00074 {  }
00075 
00076 // ######################################################################
00077 VisualBufferConfigurator::
00078 VisualBufferConfigurator(OptionManager& mgr,
00079                          const std::string& descrName,
00080                          const std::string& tagName) :
00081   ModelComponent(mgr, descrName, tagName),
00082   itsVBtype(&OPT_VisualBufferType, this),
00083   itsVB(new VisualBufferStub(mgr))
00084 {
00085   addSubComponent(itsVB);
00086 }
00087 
00088 // ######################################################################
00089 VisualBufferConfigurator::~VisualBufferConfigurator()
00090 { }
00091 
00092 // ######################################################################
00093 nub::ref<VisualBuffer> VisualBufferConfigurator::getVB() const
00094 {
00095   return itsVB;
00096 }
00097 
00098 // ######################################################################
00099 void VisualBufferConfigurator::paramChanged(ModelParamBase* const param,
00100                                             const bool valueChanged,
00101                                             ParamClient::ChangeStatus* status)
00102 {
00103   ModelComponent::paramChanged(param, valueChanged, status);
00104 
00105   // was that a change of our baby's name?
00106   if (param == &itsVBtype) {
00107     // if we had one, let's unregister it (when we later reset() the
00108     // nub::ref, the current VisualBuffer will unexport its
00109     // command-line options):
00110     removeSubComponent(*itsVB);
00111 
00112     // instantiate a SM of the appropriate type:
00113     if (itsVBtype.getVal().compare("None") == 0 ||
00114         itsVBtype.getVal().compare("Stub") == 0) // no VB
00115       itsVB.reset(new VisualBufferStub(getManager()));
00116     else if (itsVBtype.getVal().compare("Std") == 0)      // standard
00117       itsVB.reset(new VisualBufferStd(getManager()));
00118     else
00119       LFATAL("Unknown VB type %s", itsVBtype.getVal().c_str());
00120 
00121     // add our baby as a subcomponent of us so that it will become
00122     // linked to the manager through us (hopefully we are registered
00123     // with the manager), which in turn will allow it to export its
00124     // command-line options and get configured:
00125     addSubComponent(itsVB);
00126 
00127     // tell the controller to export its options:
00128     itsVB->exportOptions(MC_RECURSE);
00129 
00130     // some info message:
00131     LINFO("Selected VB of type %s", itsVBtype.getVal().c_str());
00132   }
00133 }
00134 
00135 // ######################################################################
00136 VisualBufferStd::VisualBufferStd(OptionManager& mgr,
00137                            const std::string& descrName,
00138                            const std::string& tagName) :
00139   VisualBuffer(mgr, descrName, tagName),
00140   SIMCALLBACK_INIT(SimEventAttentionGuidanceMapOutput),
00141   SIMCALLBACK_INIT(SimEventRetinaImage),
00142   itsFOAradius(&OPT_FOAradius, this),
00143   itsIgnoreBoring(&OPT_VBignoreBoring, this), // see Neuro/NeuroOpts.{H,C}
00144   itsObjectBased(&OPT_VBobjectBased, this), // see Neuro/NeuroOpts.{H,C}
00145   itsBufferDims(&OPT_VBdims, this),  // see Neuro/NeuroOpts.{H,C}
00146   itsLevelSpec(&OPT_LevelSpec, this), // see Channels/ChannelOpts.{H,C}
00147   itsInputDims(&OPT_InputFrameDims, this), // see Media/MediaOpts.{H,C}
00148   itsTimePeriod(&OPT_VBtimePeriod, this),  // see Neuro/NeuroOpts.{H,C}
00149   itsDecayFactor(&OPT_VBdecayFactor, this),
00150   itsNormType(&OPT_VBmaxNormType, this), // see Neuro/NeuroOpts.{H,C}
00151   itsBuffer(), itsSMdims(), itsSaliencyMask(), itsTime(),
00152   itsLastInteractTime(), itsRetinaOffset(0, 0)
00153 { }
00154 
00155 // ######################################################################
00156 void VisualBufferStd::start1()
00157 {
00158   // initialize our buffer if we have valid dims for it:
00159   Dims d = itsBufferDims.getVal();
00160   if (d.isNonEmpty())
00161     {
00162       itsBuffer.resize(d, true);
00163       LINFO("Using buffer dims of (%d, %d)", d.w(), d.h());
00164     }
00165   LINFO("Using internal maxnorm of type %s, decay %f",
00166         itsNormType.getValString().c_str(), itsDecayFactor.getVal());
00167 }
00168 
00169 // ######################################################################
00170 VisualBufferStd::~VisualBufferStd()
00171 {  }
00172 
00173 // ######################################################################
00174 void VisualBufferStd::
00175 onSimEventRetinaImage(SimEventQueue& q, rutz::shared_ptr<SimEventRetinaImage>& e)
00176 {
00177   // just keep track of our retina offset
00178   itsRetinaOffset = e->offset();
00179 }
00180 
00181 // ######################################################################
00182 void VisualBufferStd::
00183 onSimEventAttentionGuidanceMapOutput(SimEventQueue& q, rutz::shared_ptr<SimEventAttentionGuidanceMapOutput>& e)
00184 {
00185   // grab the agm::
00186   Image<float> agm = e->agm();
00187 
00188   // if our buffer is not object based, pass it the current saliency
00189   // map now (the winner location does not matter), otherwise, do it
00190   // only on every WTA winner:
00191   if (isObjectBased() == false)
00192     input(WTAwinner(Point2D<int>(0, 0), q.now(), 0.0, false), agm, Image<byte>());
00193   else if (SeC<SimEventWTAwinner> e = q.check<SimEventWTAwinner>(this))
00194     {
00195       const WTAwinner& win = e->winner();
00196 
00197       // Any output from the ShapeEstimator?
00198       Image<byte> foaMask;
00199       if (SeC<SimEventShapeEstimatorOutput> e = q.check<SimEventShapeEstimatorOutput>(this))
00200         foaMask = Image<byte>(e->smoothMask() * 255.0F);
00201 
00202       // all right, bufferize that stuff!
00203       input(win, agm, foaMask);
00204     }
00205 
00206   // evolve our internals one time step:
00207   this->evolve(q);
00208 }
00209 
00210 // ######################################################################
00211 void VisualBufferStd::input(const WTAwinner& win, const Image<float>& sm,
00212                          const Image<byte>& objmask)
00213 {
00214   // update our sm dims:
00215   itsSMdims = sm.getDims();
00216 
00217   // if our buffer size has not been set yet, do it now, and use the
00218   // size of the sm as our buffer size:
00219   if (itsBuffer.initialized() == false) {
00220     itsBuffer.resize(itsSMdims, true);
00221     LINFO("Using buffer dims of (%d, %d)", itsSMdims.w(), itsSMdims.h());
00222   }
00223 
00224   // ignore boring attention shifts:
00225   if (win.boring && itsIgnoreBoring.getVal())
00226     { LINFO("Ignoring boring attention shift"); return; }
00227 
00228   // Let's build a multiplicative mask that we will apply to the sm
00229   // and that will determine what saliency information gets
00230   // transferred into the buffer. We have two modes here, either
00231   // object-based (only transfer attended object) or map-based
00232   // (transfer whole map). In addition, we have two sub-cases
00233   // depending on whether objmask is initialized (then use it to
00234   // define the object) or not (then use a disk).
00235 
00236   Image<float> maskedsm; // the sm masked by our transfer mask
00237   Image<float> mask;     // the saliency mask
00238 
00239   if (itsObjectBased.getVal()) {
00240     // Object-based model. We start by building a float mask at retinal
00241     // resolution and coordinates:
00242     Image<byte> maskb;
00243 
00244     if (objmask.initialized()) {
00245       // build our mask using the object definition passed to us:
00246       maskb = objmask;              // get the (fuzzy-boundary) object shape
00247       inplaceLowThresh(maskb, byte(255)); // get tight object boundaries
00248     } else {
00249       // build our mask using a disk and distance-based decay:
00250       maskb.resize(itsInputDims.getVal(), true);  // create empty mask
00251       drawDisk(maskb, win.p, itsFOAradius.getVal(), byte(255));
00252     }
00253 
00254     maskb = chamfer34(maskb);  // introduce a distance-based spatial decay
00255     mask = maskb;              // convert to float; range is 0..255
00256     mask = rescale(mask, sm.getDims()); // downsize
00257     mask = binaryReverse(mask, 255.0F);
00258     mask = squash(mask, 0.0F, 0.0F, 128.0F, 0.25F, 255.0F, 1.0F); // squash
00259     maskedsm = sm * mask;
00260   } else {
00261     // we are not object-based and want to transfer the whole map:
00262     maskedsm = sm;
00263     mask.resize(sm.getDims()); mask.clear(1.0F);
00264   }
00265 
00266   // now let's take the max between our current buffer and our masked
00267   // sm, after shifting the masked sm to world-centered coordinates.
00268   // We start by computing the world coords of the top-left corner of
00269   // the masked sm, at maplevel:
00270   Point2D<int> tl = retinalToBuffer(Point2D<int>(0, 0));
00271   Image<float> newbuf(itsBuffer.getDims(), ZEROS);
00272   pasteImage(newbuf, maskedsm, 0.0F, tl);
00273 
00274   itsBuffer = takeMax(itsBuffer, newbuf);
00275 
00276   // apply one iteration of our internal dynamics:
00277   internalDynamics();
00278 
00279   // for display purposes, keep a copy of the mask used to transfer saliency:
00280   itsSaliencyMask = Image<byte>(mask * 255.0F);
00281 
00282   // the internal dynamics of the buffer are taken care of in evolve()
00283 }
00284 
00285 // ######################################################################
00286 Image<byte> VisualBufferStd::getSaliencyMask() const
00287 { return itsSaliencyMask; }
00288 
00289 // ######################################################################
00290 void VisualBufferStd::evolve(SimEventQueue& q)
00291 {
00292   itsTime = q.now();
00293     // apply one iteration of our internal dynamics if the time has come:
00294   if (itsTime - itsLastInteractTime >= itsTimePeriod.getVal())
00295     internalDynamics();  // will update itsLastInteractTime
00296 
00297   // post our buffer
00298 
00299 }
00300 
00301 // ######################################################################
00302 void VisualBufferStd::inhibit(const Point2D<int>& loc)
00303 {
00304   Image<float> mask(itsBuffer.getDims(), ZEROS);
00305   drawDisk(mask, loc, itsFOAradius.getVal() >> itsLevelSpec.getVal().mapLevel(), 1.0F);
00306   inhibit(mask);
00307 }
00308 
00309 // ######################################################################
00310 void VisualBufferStd::inhibit(const Image<float>& mask)
00311 {
00312   Image<float> inhib = binaryReverse(mask, 1.0F);
00313   itsBuffer *= inhib;
00314 }
00315 
00316 // ######################################################################
00317 Point2D<int> VisualBufferStd::findMostInterestingTarget(const Point2D<int>& p)
00318 {
00319   // let's start by cutting-off locations below a given fraction of
00320   // the max activation over the buffer, and binarize the rest:
00321   Image<float> buf = itsBuffer; float mi, ma; getMinMax(buf, mi, ma);
00322   Image<byte> bin = makeBinary(buf, ma * 0.25F, 0, 1);
00323 
00324   // now let's find the cluster that is closest to our current eye
00325   // position. For that, we compute a distance map from a single pixel
00326   // at current eye position, multiply it by our binary mask, and look
00327   // for the smallest non-zero value:
00328   Image<byte> dmap(bin.getDims(), ZEROS);
00329   dmap.setVal(p, 255); dmap = chamfer34(dmap);
00330   Image<byte> prod = bin * dmap;
00331   inplaceReplaceVal(prod, 0, 255); // eliminate zero distances outside our blobs
00332 
00333   Point2D<int> minp; byte minval;
00334   findMin(prod, minp, minval);
00335 
00336   // minp belongs to our closest cluster. Let's segment that cluster
00337   // out so that we can compute its center of gravity:
00338   Image<byte> obj; flood(bin, obj, minp, byte(1), byte(1));
00339   Image<float> objf = itsBuffer * obj;
00340 
00341   return centroid(objf);
00342 }
00343 
00344 // ######################################################################
00345 Point2D<int> VisualBufferStd::findMostInterestingTargetLocMax(const Point2D<int>& p)
00346 {
00347   // let's start by getting an idea of the range of values in our
00348   // buffer after we blur it a bit:
00349   Image<float> buf = lowPass9(itsBuffer);
00350   float mi, ma; getMinMax(buf, mi, ma);
00351   float thresh = ma * 0.25F;
00352 
00353   // let's go over the image and find the local max that is above a
00354   // threshold and closest to our current fixation. The code here is
00355   // similar to what we have in maxNormalizeStd() to find local maxes,
00356   // except that here we want to enforce a true local max, not a ridge
00357   // point (hence we use strict inequalities):
00358   int w = buf.getWidth(), h = buf.getHeight();
00359   Point2D<int> best(-1, -1); float bestdist(1.0e10);
00360   for (int j = 1; j < h - 1; j ++)
00361     for (int i = 1; i < w - 1; i ++)
00362       {
00363         int index = i + w * j;
00364         float val = buf.getVal(index);
00365         if (val >= thresh &&                      // strong enough activity
00366             val > buf.getVal(index - w) &&        // local max
00367             val > buf.getVal(index + w) &&        // local max
00368             val > buf.getVal(index - 1) &&        // local max
00369             val > buf.getVal(index + 1) &&        // local max
00370             p.distance(Point2D<int>(i, j)) < bestdist) // closest to eye
00371           { best.i = i; best.j = j; bestdist = p.distance(best); }
00372       }
00373   return best;
00374 }
00375 
00376 // ######################################################################
00377 Image<float> VisualBufferStd::getBuffer() const
00378 { return itsBuffer; }
00379 
00380 // ######################################################################
00381 Point2D<int> VisualBufferStd::retinalToBuffer(const Point2D<int>& p) const
00382 {
00383   return retinalToVisualBuffer(p, itsRetinaOffset, itsLevelSpec.getVal().mapLevel(), itsSMdims, itsBuffer.getDims());
00384 }
00385 
00386 // ######################################################################
00387 Point2D<int> VisualBufferStd::bufferToRetinal(const Point2D<int>& p) const
00388 {
00389   return visualBufferToRetinal(p, itsRetinaOffset, itsLevelSpec.getVal().mapLevel(), itsSMdims, itsBuffer.getDims());
00390 }
00391 
00392 // ######################################################################
00393 bool VisualBufferStd::isObjectBased() const
00394 { return itsObjectBased.getVal(); }
00395 
00396 // ######################################################################
00397 void VisualBufferStd::internalDynamics()
00398 {
00399   float mi, ma; getMinMax(itsBuffer, mi, ma);
00400   itsBuffer = maxNormalize(itsBuffer, 0.0F, 0.0F, itsNormType.getVal());
00401   inplaceNormalize(itsBuffer, mi, ma);
00402   if (itsDecayFactor.getVal() != 1.0F) itsBuffer *= itsDecayFactor.getVal();
00403   itsLastInteractTime = itsTime;
00404 }
00405 
00406 
00407 // ######################################################################
00408 /* So things look consistent in everyone's emacs... */
00409 /* Local Variables: */
00410 /* indent-tabs-mode: nil */
00411 /* End: */