app-perceptual-grouping.C

Go to the documentation of this file.
00001 /*!@file AppNeuro/app-perceptual-grouping.C  Generates perceptual grouping of features
00002  */
00003 
00004 // //////////////////////////////////////////////////////////////////// //
00005 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the //
00006 // University of Southern California (USC) and the iLab at USC.         //
00007 // See http://iLab.usc.edu for information about this project.          //
00008 // //////////////////////////////////////////////////////////////////// //
00009 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00010 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00011 // in Visual Environments, and Applications'' by Christof Koch and      //
00012 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00013 // pending; application number 09/912,225 filed July 23, 2001; see      //
00014 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00015 // //////////////////////////////////////////////////////////////////// //
00016 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00017 //                                                                      //
00018 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00019 // redistribute it and/or modify it under the terms of the GNU General  //
00020 // Public License as published by the Free Software Foundation; either  //
00021 // version 2 of the License, or (at your option) any later version.     //
00022 //                                                                      //
00023 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00024 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00025 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00026 // PURPOSE.  See the GNU General Public License for more details.       //
00027 //                                                                      //
00028 // You should have received a copy of the GNU General Public License    //
00029 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00030 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00031 // Boston, MA 02111-1307 USA.                                           //
00032 // //////////////////////////////////////////////////////////////////// //
00033 //
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/AppNeuro/app-perceptual-grouping.C $
00035 // $Id: app-perceptual-grouping.C 10827 2009-02-11 09:40:02Z itti $
00036 //
00037 
00038 #include "Channels/ChannelVisitor.H"
00039 #include "Channels/ComplexChannel.H"
00040 #include "Channels/SingleChannel.H"
00041 #include "Component/ModelManager.H"
00042 #include "Media/FrameSeries.H"
00043 #include "Image/ColorOps.H"
00044 #include "Image/MathOps.H"
00045 #include "Image/Pixels.H"
00046 #include "Image/ShapeOps.H"
00047 #include "Media/MediaSimEvents.H"
00048 #include "Neuro/StdBrain.H"
00049 #include "Neuro/NeuroSimEvents.H"
00050 #include "Neuro/VisualCortex.H"
00051 #include "Simulation/SimEventQueueConfigurator.H"
00052 #include "Raster/Raster.H"
00053 #include "Util/Types.H"
00054 
00055 namespace
00056 {
00057   //! Get the coefficients (gain factors) for perceptual feature grouping
00058   class CoeffGetter : public ChannelVisitor
00059   {
00060     std::vector<double>& itsCoeff;
00061 
00062   public:
00063     CoeffGetter(std::vector<double>& c) : itsCoeff(c) {}
00064 
00065     virtual ~CoeffGetter() {}
00066 
00067     virtual void visitChannelBase(ChannelBase& chan)
00068     {
00069       LFATAL("don't know how to handle %s", chan.tagName().c_str());
00070     }
00071 
00072     virtual void visitSingleChannel(SingleChannel& chan)
00073     {
00074       for (uint idx = 0; idx < chan.numSubmaps(); idx ++)
00075         {
00076           // get center and surround scales for this submap index:
00077           uint clev = 0, slev = 0;
00078           chan.getLevelSpec().indexToCS(idx, clev, slev);
00079           // find the coefficient for this submap as a function of the
00080           // amount of signal present
00081           double sum = 0.0;
00082           Image<float> submap = chan.getSubmap(idx);
00083           float min = 0.0f, max = 0.0f, avg = 0.0f;
00084           getMinMaxAvg(submap, min, max, avg);
00085           /*uint w = submap.getWidth(), h = submap.getHeight();
00086             for (uint i = 0; i < w; i++)
00087             for (uint j = 0; j < h; j++){
00088             double salience = submap.getVal(i,j);
00089             sum += salience*salience*salience;
00090             }
00091           */
00092           sum = max - avg;
00093           LINFO("%s(%d,%d): -- amount of signal = %lf",
00094                 chan.tagName().c_str(), clev, slev, sum);
00095           itsCoeff.push_back(sum);
00096         }
00097     }
00098 
00099     virtual void visitComplexChannel(ComplexChannel& chan)
00100     {
00101       for (uint i = 0; i < chan.numChans(); i++)
00102         chan.subChan(i)->accept(*this);
00103     }
00104   };
00105 
00106   //! Set the coefficients (gain factors) for perceptual feature grouping
00107   class CoeffSetter : public ChannelVisitor
00108   {
00109     const std::vector<double>& itsCoeff;
00110     uint itsIndex;
00111 
00112   public:
00113     CoeffSetter(const std::vector<double>& c) : itsCoeff(c), itsIndex(0) {}
00114 
00115     virtual ~CoeffSetter() {}
00116 
00117     virtual void visitChannelBase(ChannelBase& chan)
00118     {
00119       LFATAL("don't know how to handle %s", chan.tagName().c_str());
00120     }
00121 
00122     virtual void visitSingleChannel(SingleChannel& chan)
00123     {
00124       const uint num = chan.numSubmaps();
00125       for (uint i = 0; i < num; ++i)
00126         {
00127           uint clev = 0, slev = 0;
00128           chan.getLevelSpec().indexToCS(i, clev, slev);
00129           LFATAL("FIXME");
00130           /////chan.setCoeff(clev, slev, itsCoeff[itsIndex]);
00131           ++itsIndex;
00132         }
00133     }
00134 
00135     virtual void visitComplexChannel(ComplexChannel& chan)
00136     {
00137       for (uint i = 0; i < chan.numChans(); ++i)
00138         chan.subChan(i)->accept(*this);
00139     }
00140   };
00141 
00142   //! Compute the percept by grouping features
00143   class PerceptualGrouping : public ChannelVisitor
00144   {
00145     Image<float> itsPercept;
00146 
00147   public:
00148     PerceptualGrouping() {}
00149 
00150     virtual ~PerceptualGrouping() {}
00151 
00152     Image<float> getPercept() const { return itsPercept; }
00153 
00154     virtual void visitChannelBase(ChannelBase& chan)
00155     {
00156       LFATAL("don't know how to handle %s", chan.tagName().c_str());
00157     }
00158 
00159     virtual void visitSingleChannel(SingleChannel& chan)
00160     {
00161       ASSERT(itsPercept.initialized() == false);
00162 
00163       itsPercept = Image<float>(chan.getMapDims(), ZEROS);
00164 
00165       // compute a weighted sum of raw feature maps at all levels:
00166       for (uint idx = 0; idx < chan.getLevelSpec().maxIndex(); ++idx)
00167         {
00168           LFATAL("FIXME");
00169           const float w = 0.0;////////float(chan.getCoeff(idx));     // weight for that submap
00170           if (w != 0.0f)
00171             {
00172               Image<float> submap = chan.getRawCSmap(idx); // get raw map
00173               if (w != 1.0f) submap *= w;            // weigh the submap
00174               // resize submap to fixed scale if necessary:
00175               if (submap.getWidth() > chan.getMapDims().w())
00176                 submap = downSize(submap, chan.getMapDims());
00177               else if (submap.getWidth() < chan.getMapDims().w())
00178                 submap = rescale(submap, chan.getMapDims());
00179               itsPercept += submap;                  // add submap to our sum
00180             }
00181         }
00182     }
00183 
00184     virtual void visitComplexChannel(ComplexChannel& chan)
00185     {
00186       ASSERT(itsPercept.initialized() == false);
00187 
00188       itsPercept = Image<float>(chan.getMapDims(), ZEROS);
00189 
00190       for (uint i = 0; i < chan.numChans(); ++i)
00191         {
00192           if (chan.getSubchanTotalWeight(i) == 0.0) continue;
00193           if (chan.subChan(i)->outputAvailable() == false) continue;
00194           PerceptualGrouping g;
00195           chan.subChan(i)->accept(g);
00196           Image<float> subChanOut = g.getPercept();
00197           const float w = float(chan.getSubchanTotalWeight(i));
00198           if (w != 1.0f) subChanOut *= w;
00199           LINFO("%s grouping weight %f",
00200                 chan.subChan(i)->tagName().c_str(), w);
00201           itsPercept += downSizeClean(subChanOut, itsPercept.getDims());
00202         }
00203     }
00204   };
00205 
00206 }
00207 
00208 int main(const int argc, const char **argv)
00209 {
00210   MYLOGVERB = LOG_INFO;  // suppress debug messages
00211 
00212   // Instantiate a ModelManager:
00213   ModelManager manager("Attention Model");
00214 
00215   // Instantiate our various ModelComponents:
00216   nub::soft_ref<SimEventQueueConfigurator>
00217     seqc(new SimEventQueueConfigurator(manager));
00218   manager.addSubComponent(seqc);
00219 
00220   nub::soft_ref<InputFrameSeries> ifs(new InputFrameSeries(manager));
00221   manager.addSubComponent(ifs);
00222 
00223   nub::soft_ref<OutputFrameSeries> ofs(new OutputFrameSeries(manager));
00224   manager.addSubComponent(ofs);
00225 
00226   nub::soft_ref<StdBrain> brain(new StdBrain(manager));
00227   manager.addSubComponent(brain);
00228 
00229   // Parse command-line:
00230   if (manager.parseCommandLine(argc, argv, "", 0, 0) == false)
00231     return(1);
00232 
00233   nub::soft_ref<SimEventQueue> seq = seqc->getQ();
00234 
00235   // let's get all our ModelComponent instances started:
00236   manager.start();
00237 
00238   // main loop:
00239   bool first=true; std::vector<double> coeff; int count = 0;
00240                 // for perceptual grouping
00241   while(1) {
00242 
00243     // read new image in?
00244     const FrameState is = ifs->update(seq->now());
00245     if (is == FRAME_COMPLETE) break; // done
00246     if (is == FRAME_NEXT || is == FRAME_FINAL) // new frame
00247       {
00248         Image< PixRGB<byte> > input = ifs->readRGB();
00249 
00250         // empty image signifies end-of-stream
00251         if (input.initialized())
00252           {
00253             rutz::shared_ptr<SimEventInputFrame>
00254               e(new SimEventInputFrame(brain.get(), GenericFrame(input), 0));
00255             seq->post(e); // post the image to the brain
00256 
00257             // show memory usage if in debug mode:
00258             if (MYLOGVERB >= LOG_DEBUG)
00259               SHOWMEMORY("MEMORY USAGE: frame %d t=%.1fms", ifs->frame(),
00260                          seq->now().msecs());
00261           }
00262       }
00263 
00264     // evolve brain:
00265     seq->evolve();
00266 
00267 
00268     // write outputs or quit?
00269     bool gotcovert = false;
00270     if (seq->check<SimEventWTAwinner>(0)) gotcovert = true;
00271     const FrameState os = ofs->update(seq->now(), gotcovert);
00272 
00273     if (os == FRAME_NEXT || os == FRAME_FINAL) // new FOA
00274       {
00275         brain->save(SimModuleSaveInfo(ofs, *seq));
00276 
00277         // arbitrary: every time we have a winner, change the percept
00278         // get the gain factors based on amount of signal present
00279         // within the feature map
00280         LINFO ("perceptual feature grouping");
00281         if (first) {
00282           LINFO ("first iteration...getting coefficients");
00283 
00284           LFATAL("fixme");
00285 
00286           //////          CoeffGetter g(coeff);
00287           /////// brain->getVC()->accept(g);
00288           first = false;
00289         }
00290         LINFO ("obtained the coefficients...");
00291         // for perceptual feature grouping, iterate through the gain
00292         // factors, setting one of them to 1.0 and the rest to 0.0 in
00293         // decsending order
00294         uint idx = 0; // idx corresponding to the max
00295         for(uint i = 1; i < coeff.size(); i++)
00296           if (coeff[i] > coeff[idx])
00297             idx = i;
00298         std::vector<double> perceptCoeff;
00299         for(uint i = 0; i < coeff.size(); i++)
00300           perceptCoeff.push_back(0.0);
00301         perceptCoeff[idx] = 1.0;
00302         {
00303           LFATAL("fixme");
00304           ////////CoeffSetter s(perceptCoeff);
00305           // set the new coefficients on the dummy VC
00306           ///////brain->getVC()->accept(s);
00307         }
00308         LINFO ("iteration %d: gain of chan %d = 1.0", count+1, idx);
00309         // prevent current max from being chosen again
00310         coeff[idx] = -1.0; count++;
00311 
00312         // compute the percept
00313         LFATAL("fixme");
00314         PerceptualGrouping pg;
00315         ///////brain->getVC()->accept(pg);
00316         Image<float> percept = pg.getPercept();
00317         char out[10];
00318         sprintf(out,"%d_chan%d",count,idx);
00319         //Raster::WriteFloat(percept, FLOAT_NORM_0_255, sformat("percept_%s.pgm",out));
00320         inplaceNormalize(percept, 0.0f, 1.0f);
00321         Image< PixRGB<byte> > input = ifs->readRGB();
00322         percept = rescale(percept, input.getDims());
00323         input = input * percept;
00324         normalizeC (input, 0, 255);
00325         Raster::WriteRGB (input, sformat("percept_%s.ppm",out));
00326 
00327       }
00328 
00329     if (os == FRAME_FINAL)
00330       break;
00331 
00332     // if we displayed a bunch of images, let's pause:
00333     if (ifs->shouldWait() || ofs->shouldWait())
00334       Raster::waitForKey();
00335   }
00336 
00337   // stop all our ModelComponents
00338   manager.stop();
00339 
00340   // all done!
00341   return 0;
00342 }
00343 
00344 // ######################################################################
00345 /* So things look consistent in everyone's emacs... */
00346 /* Local Variables: */
00347 /* indent-tabs-mode: nil */
00348 /* End: */