00001 /*!@file AppNeuro/app-perceptual-grouping.C Generates perceptual grouping of features 00002 */ 00003 00004 // //////////////////////////////////////////////////////////////////// // 00005 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00006 // University of Southern California (USC) and the iLab at USC. // 00007 // See http://iLab.usc.edu for information about this project. // 00008 // //////////////////////////////////////////////////////////////////// // 00009 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00010 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00011 // in Visual Environments, and Applications'' by Christof Koch and // 00012 // Laurent Itti, California Institute of Technology, 2001 (patent // 00013 // pending; application number 09/912,225 filed July 23, 2001; see // 00014 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00015 // //////////////////////////////////////////////////////////////////// // 00016 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00017 // // 00018 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00019 // redistribute it and/or modify it under the terms of the GNU General // 00020 // Public License as published by the Free Software Foundation; either // 00021 // version 2 of the License, or (at your option) any later version. // 00022 // // 00023 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00024 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00025 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00026 // PURPOSE. See the GNU General Public License for more details. // 00027 // // 00028 // You should have received a copy of the GNU General Public License // 00029 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00030 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00031 // Boston, MA 02111-1307 USA. // 00032 // //////////////////////////////////////////////////////////////////// // 00033 // 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/AppNeuro/app-perceptual-grouping.C $ 00035 // $Id: app-perceptual-grouping.C 10827 2009-02-11 09:40:02Z itti $ 00036 // 00037 00038 #include "Channels/ChannelVisitor.H" 00039 #include "Channels/ComplexChannel.H" 00040 #include "Channels/SingleChannel.H" 00041 #include "Component/ModelManager.H" 00042 #include "Media/FrameSeries.H" 00043 #include "Image/ColorOps.H" 00044 #include "Image/MathOps.H" 00045 #include "Image/Pixels.H" 00046 #include "Image/ShapeOps.H" 00047 #include "Media/MediaSimEvents.H" 00048 #include "Neuro/StdBrain.H" 00049 #include "Neuro/NeuroSimEvents.H" 00050 #include "Neuro/VisualCortex.H" 00051 #include "Simulation/SimEventQueueConfigurator.H" 00052 #include "Raster/Raster.H" 00053 #include "Util/Types.H" 00054 00055 namespace 00056 { 00057 //! Get the coefficients (gain factors) for perceptual feature grouping 00058 class CoeffGetter : public ChannelVisitor 00059 { 00060 std::vector<double>& itsCoeff; 00061 00062 public: 00063 CoeffGetter(std::vector<double>& c) : itsCoeff(c) {} 00064 00065 virtual ~CoeffGetter() {} 00066 00067 virtual void visitChannelBase(ChannelBase& chan) 00068 { 00069 LFATAL("don't know how to handle %s", chan.tagName().c_str()); 00070 } 00071 00072 virtual void visitSingleChannel(SingleChannel& chan) 00073 { 00074 for (uint idx = 0; idx < chan.numSubmaps(); idx ++) 00075 { 00076 // get center and surround scales for this submap index: 00077 uint clev = 0, slev = 0; 00078 chan.getLevelSpec().indexToCS(idx, clev, slev); 00079 // find the coefficient for this submap as a function of the 00080 // amount of signal present 00081 double sum = 0.0; 00082 Image<float> submap = chan.getSubmap(idx); 00083 float min = 0.0f, max = 0.0f, avg = 0.0f; 00084 getMinMaxAvg(submap, min, max, avg); 00085 /*uint w = submap.getWidth(), h = submap.getHeight(); 00086 for (uint i = 0; i < w; i++) 00087 for (uint j = 0; j < h; j++){ 00088 double salience = submap.getVal(i,j); 00089 sum += salience*salience*salience; 00090 } 00091 */ 00092 sum = max - avg; 00093 LINFO("%s(%d,%d): -- amount of signal = %lf", 00094 chan.tagName().c_str(), clev, slev, sum); 00095 itsCoeff.push_back(sum); 00096 } 00097 } 00098 00099 virtual void visitComplexChannel(ComplexChannel& chan) 00100 { 00101 for (uint i = 0; i < chan.numChans(); i++) 00102 chan.subChan(i)->accept(*this); 00103 } 00104 }; 00105 00106 //! Set the coefficients (gain factors) for perceptual feature grouping 00107 class CoeffSetter : public ChannelVisitor 00108 { 00109 const std::vector<double>& itsCoeff; 00110 uint itsIndex; 00111 00112 public: 00113 CoeffSetter(const std::vector<double>& c) : itsCoeff(c), itsIndex(0) {} 00114 00115 virtual ~CoeffSetter() {} 00116 00117 virtual void visitChannelBase(ChannelBase& chan) 00118 { 00119 LFATAL("don't know how to handle %s", chan.tagName().c_str()); 00120 } 00121 00122 virtual void visitSingleChannel(SingleChannel& chan) 00123 { 00124 const uint num = chan.numSubmaps(); 00125 for (uint i = 0; i < num; ++i) 00126 { 00127 uint clev = 0, slev = 0; 00128 chan.getLevelSpec().indexToCS(i, clev, slev); 00129 LFATAL("FIXME"); 00130 /////chan.setCoeff(clev, slev, itsCoeff[itsIndex]); 00131 ++itsIndex; 00132 } 00133 } 00134 00135 virtual void visitComplexChannel(ComplexChannel& chan) 00136 { 00137 for (uint i = 0; i < chan.numChans(); ++i) 00138 chan.subChan(i)->accept(*this); 00139 } 00140 }; 00141 00142 //! Compute the percept by grouping features 00143 class PerceptualGrouping : public ChannelVisitor 00144 { 00145 Image<float> itsPercept; 00146 00147 public: 00148 PerceptualGrouping() {} 00149 00150 virtual ~PerceptualGrouping() {} 00151 00152 Image<float> getPercept() const { return itsPercept; } 00153 00154 virtual void visitChannelBase(ChannelBase& chan) 00155 { 00156 LFATAL("don't know how to handle %s", chan.tagName().c_str()); 00157 } 00158 00159 virtual void visitSingleChannel(SingleChannel& chan) 00160 { 00161 ASSERT(itsPercept.initialized() == false); 00162 00163 itsPercept = Image<float>(chan.getMapDims(), ZEROS); 00164 00165 // compute a weighted sum of raw feature maps at all levels: 00166 for (uint idx = 0; idx < chan.getLevelSpec().maxIndex(); ++idx) 00167 { 00168 LFATAL("FIXME"); 00169 const float w = 0.0;////////float(chan.getCoeff(idx)); // weight for that submap 00170 if (w != 0.0f) 00171 { 00172 Image<float> submap = chan.getRawCSmap(idx); // get raw map 00173 if (w != 1.0f) submap *= w; // weigh the submap 00174 // resize submap to fixed scale if necessary: 00175 if (submap.getWidth() > chan.getMapDims().w()) 00176 submap = downSize(submap, chan.getMapDims()); 00177 else if (submap.getWidth() < chan.getMapDims().w()) 00178 submap = rescale(submap, chan.getMapDims()); 00179 itsPercept += submap; // add submap to our sum 00180 } 00181 } 00182 } 00183 00184 virtual void visitComplexChannel(ComplexChannel& chan) 00185 { 00186 ASSERT(itsPercept.initialized() == false); 00187 00188 itsPercept = Image<float>(chan.getMapDims(), ZEROS); 00189 00190 for (uint i = 0; i < chan.numChans(); ++i) 00191 { 00192 if (chan.getSubchanTotalWeight(i) == 0.0) continue; 00193 if (chan.subChan(i)->outputAvailable() == false) continue; 00194 PerceptualGrouping g; 00195 chan.subChan(i)->accept(g); 00196 Image<float> subChanOut = g.getPercept(); 00197 const float w = float(chan.getSubchanTotalWeight(i)); 00198 if (w != 1.0f) subChanOut *= w; 00199 LINFO("%s grouping weight %f", 00200 chan.subChan(i)->tagName().c_str(), w); 00201 itsPercept += downSizeClean(subChanOut, itsPercept.getDims()); 00202 } 00203 } 00204 }; 00205 00206 } 00207 00208 int main(const int argc, const char **argv) 00209 { 00210 MYLOGVERB = LOG_INFO; // suppress debug messages 00211 00212 // Instantiate a ModelManager: 00213 ModelManager manager("Attention Model"); 00214 00215 // Instantiate our various ModelComponents: 00216 nub::soft_ref<SimEventQueueConfigurator> 00217 seqc(new SimEventQueueConfigurator(manager)); 00218 manager.addSubComponent(seqc); 00219 00220 nub::soft_ref<InputFrameSeries> ifs(new InputFrameSeries(manager)); 00221 manager.addSubComponent(ifs); 00222 00223 nub::soft_ref<OutputFrameSeries> ofs(new OutputFrameSeries(manager)); 00224 manager.addSubComponent(ofs); 00225 00226 nub::soft_ref<StdBrain> brain(new StdBrain(manager)); 00227 manager.addSubComponent(brain); 00228 00229 // Parse command-line: 00230 if (manager.parseCommandLine(argc, argv, "", 0, 0) == false) 00231 return(1); 00232 00233 nub::soft_ref<SimEventQueue> seq = seqc->getQ(); 00234 00235 // let's get all our ModelComponent instances started: 00236 manager.start(); 00237 00238 // main loop: 00239 bool first=true; std::vector<double> coeff; int count = 0; 00240 // for perceptual grouping 00241 while(1) { 00242 00243 // read new image in? 00244 const FrameState is = ifs->update(seq->now()); 00245 if (is == FRAME_COMPLETE) break; // done 00246 if (is == FRAME_NEXT || is == FRAME_FINAL) // new frame 00247 { 00248 Image< PixRGB<byte> > input = ifs->readRGB(); 00249 00250 // empty image signifies end-of-stream 00251 if (input.initialized()) 00252 { 00253 rutz::shared_ptr<SimEventInputFrame> 00254 e(new SimEventInputFrame(brain.get(), GenericFrame(input), 0)); 00255 seq->post(e); // post the image to the brain 00256 00257 // show memory usage if in debug mode: 00258 if (MYLOGVERB >= LOG_DEBUG) 00259 SHOWMEMORY("MEMORY USAGE: frame %d t=%.1fms", ifs->frame(), 00260 seq->now().msecs()); 00261 } 00262 } 00263 00264 // evolve brain: 00265 seq->evolve(); 00266 00267 00268 // write outputs or quit? 00269 bool gotcovert = false; 00270 if (seq->check<SimEventWTAwinner>(0)) gotcovert = true; 00271 const FrameState os = ofs->update(seq->now(), gotcovert); 00272 00273 if (os == FRAME_NEXT || os == FRAME_FINAL) // new FOA 00274 { 00275 brain->save(SimModuleSaveInfo(ofs, *seq)); 00276 00277 // arbitrary: every time we have a winner, change the percept 00278 // get the gain factors based on amount of signal present 00279 // within the feature map 00280 LINFO ("perceptual feature grouping"); 00281 if (first) { 00282 LINFO ("first iteration...getting coefficients"); 00283 00284 LFATAL("fixme"); 00285 00286 ////// CoeffGetter g(coeff); 00287 /////// brain->getVC()->accept(g); 00288 first = false; 00289 } 00290 LINFO ("obtained the coefficients..."); 00291 // for perceptual feature grouping, iterate through the gain 00292 // factors, setting one of them to 1.0 and the rest to 0.0 in 00293 // decsending order 00294 uint idx = 0; // idx corresponding to the max 00295 for(uint i = 1; i < coeff.size(); i++) 00296 if (coeff[i] > coeff[idx]) 00297 idx = i; 00298 std::vector<double> perceptCoeff; 00299 for(uint i = 0; i < coeff.size(); i++) 00300 perceptCoeff.push_back(0.0); 00301 perceptCoeff[idx] = 1.0; 00302 { 00303 LFATAL("fixme"); 00304 ////////CoeffSetter s(perceptCoeff); 00305 // set the new coefficients on the dummy VC 00306 ///////brain->getVC()->accept(s); 00307 } 00308 LINFO ("iteration %d: gain of chan %d = 1.0", count+1, idx); 00309 // prevent current max from being chosen again 00310 coeff[idx] = -1.0; count++; 00311 00312 // compute the percept 00313 LFATAL("fixme"); 00314 PerceptualGrouping pg; 00315 ///////brain->getVC()->accept(pg); 00316 Image<float> percept = pg.getPercept(); 00317 char out[10]; 00318 sprintf(out,"%d_chan%d",count,idx); 00319 //Raster::WriteFloat(percept, FLOAT_NORM_0_255, sformat("percept_%s.pgm",out)); 00320 inplaceNormalize(percept, 0.0f, 1.0f); 00321 Image< PixRGB<byte> > input = ifs->readRGB(); 00322 percept = rescale(percept, input.getDims()); 00323 input = input * percept; 00324 normalizeC (input, 0, 255); 00325 Raster::WriteRGB (input, sformat("percept_%s.ppm",out)); 00326 00327 } 00328 00329 if (os == FRAME_FINAL) 00330 break; 00331 00332 // if we displayed a bunch of images, let's pause: 00333 if (ifs->shouldWait() || ofs->shouldWait()) 00334 Raster::waitForKey(); 00335 } 00336 00337 // stop all our ModelComponents 00338 manager.stop(); 00339 00340 // all done! 00341 return 0; 00342 } 00343 00344 // ###################################################################### 00345 /* So things look consistent in everyone's emacs... */ 00346 /* Local Variables: */ 00347 /* indent-tabs-mode: nil */ 00348 /* End: */