00001 /*!@file Neuro/AttentionGuidanceMap.H Class declarations for task-relevance map class */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00005 // University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Laurent Itti <itti@usc.edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Neuro/AttentionGate.H $ 00035 // $Id: AttentionGate.H 10794 2009-02-08 06:21:09Z itti $ 00036 // 00037 00038 #ifndef ATTENTIONGATE_H_DEFINED 00039 #define ATTENTIONGATE_H_DEFINED 00040 00041 #include "Component/ModelComponent.H" 00042 #include "Component/ModelParam.H" 00043 #include "Image/Image.H" 00044 #include "Media/MediaSimEvents.H" 00045 #include "Neuro/NeuroSimEvents.H" 00046 #include "Simulation/SimModule.H" 00047 #include "Simulation/SimEvents.H" 00048 #include "Util/SimTime.H" 00049 #include "VFAT/segmentImageMC2.H" 00050 00051 enum AG_METHODS_ENUM 00052 { 00053 AG_CENTER, 00054 AG_MAX, 00055 AG_NORMALIZE, 00056 }; 00057 00058 class FrameOstream; 00059 class ModelManager; 00060 class SimTime; 00061 class VisualCortex; 00062 00063 // ###################################################################### 00064 //! The Attention Gate Class 00065 // ###################################################################### 00066 /*! This is a class that models the first and second stage of the two stage 00067 attention gating mechanism. The first stage decides what visual information 00068 will get through to the next stage. The second stage is not so much a gate 00069 as it is an integrator. 00070 00071 Stage One: The first stage must account for both attention blocking and 00072 attention capture. So for each frame, what parts of the image 00073 get through is effected by frames that come before as well 00074 as afterwards. So, in order for something to get through it must 00075 be more powerful than something that comes in the next image 00076 (Blocking) and more powerful than something that came before it 00077 (Capture). 00078 00079 Simple: The simple model uses the basic attention guidance 00080 map as the basis for blocking. This has the advantage 00081 of simplicity, but has the drawback that channels 00082 seem to act at different time scales and interact 00083 in special ways. 00084 00085 Complex: The complex model allows different channels to dwell 00086 for longer or shorter time intervals. For instance, 00087 it seems that color should have a strong short period 00088 of action, but that luminance and orientations have a 00089 longer but less pronounced dwell time. Also, orientation 00090 channels may have orthogonal interactions. 00091 00092 Stage Two: The second stage is an integrator. Images that come first in 00093 a series begin to assemble. If another image comes in with a 00094 set of similar features it aids in its coherence. However, 00095 if the first image is more coherent it will absorb the first 00096 image. As such a frame can block a frame that follows at 00097 100-300 ms by absorbing it. If this happens the first image 00098 is enhanced by absorbing the second image. The second stage 00099 emulates the resonable expectation of visual flow in a sequence 00100 of images, but allows for non fluid items to burst through. 00101 00102 The AG is based on the outcome from our recent RSVP work as well as work 00103 by Sperling et al 2001 and Chun and Potter 1995. 00104 00105 . */ 00106 class AttentionGate : public SimModule 00107 { 00108 public: 00109 // ###################################################################### 00110 //! @name Constructor, destructor 00111 //@{ 00112 00113 //! Ininitialized constructor 00114 /*! The map will be resized and initialized the first time input() is 00115 called */ 00116 AttentionGate(OptionManager& mgr, 00117 const std::string& descrName = "Attention Gate Map", 00118 const std::string& tagName = "AttentionGate", 00119 const nub::soft_ref<VisualCortex> vcx = 00120 nub::soft_ref<VisualCortex>()); 00121 00122 //! Destructor 00123 virtual ~AttentionGate(); 00124 00125 //@} 00126 00127 protected: 00128 //! Save our internals when saveResults() is called? 00129 OModelParam<bool> itsSaveResults; 00130 nub::soft_ref<VisualCortex> itsVCX; 00131 unsigned short itsSizeX; 00132 unsigned short itsSizeY; 00133 unsigned int itsFrameNumber; 00134 Image<PixRGB<float> > itsLastFrame; 00135 Image<PixRGB<float> > itsCurrFrame; 00136 float itsLogSigO; 00137 float itsLogSigS; 00138 00139 private: 00140 // forbid assignment and copy-construction: 00141 AttentionGate& operator=(const AttentionGate& sm); 00142 AttentionGate(const AttentionGate& sm); 00143 }; 00144 00145 // ###################################################################### 00146 //! AttentionGate configurator 00147 // ###################################################################### 00148 class AttentionGateConfigurator : public ModelComponent 00149 { 00150 public: 00151 //! Constructor 00152 AttentionGateConfigurator(OptionManager& mgr, 00153 const std::string& descrName = 00154 "Attention Gate Configurator", 00155 const std::string& tagName = 00156 "AttentionGateConfigurator"); 00157 00158 //! destructor 00159 virtual ~AttentionGateConfigurator(); 00160 00161 //! Get the chosen AG 00162 /*! You should call this during start() of the ModelComponent that 00163 needs the AG. */ 00164 nub::ref<AttentionGate> getAG() const; 00165 00166 protected: 00167 OModelParam<std::string> itsAGtype; //!< type of map 00168 00169 //! Intercept people changing our ModelParam 00170 /*! See ModelComponent.H; as parsing the command-line or reading a 00171 config file sets our name, we'll also here instantiate a 00172 controller of the proper type (and export its options) */ 00173 virtual void paramChanged(ModelParamBase* const param, 00174 const bool valueChanged, 00175 ParamClient::ChangeStatus* status); 00176 00177 private: 00178 nub::ref<AttentionGate> itsAG; // the map 00179 }; 00180 00181 00182 00183 // ###################################################################### 00184 //! The standard attention gate 00185 // ###################################################################### 00186 /*! This is our current standard AG implementation. */ 00187 class AttentionGateStd : public AttentionGate 00188 { 00189 public: 00190 //! Uninitialized constructor 00191 AttentionGateStd(OptionManager& mgr, const std::string& descrName = 00192 "Task-Relevance Map Std", 00193 const std::string& tagName = 00194 "AttentionGateStd"); 00195 00196 //! Destructor 00197 virtual ~AttentionGateStd(); 00198 00199 //! Reset to initial state just after construction 00200 virtual void reset1(); 00201 00202 protected: 00203 //! Callback for input frames 00204 SIMCALLBACK_DECLARE(AttentionGateStd, SimEventInputFrame); 00205 00206 //! Callback for AGM 00207 SIMCALLBACK_DECLARE(AttentionGateStd, SimEventAttentionGuidanceMapOutput); 00208 00209 //! Callback for every time we should save our outputs 00210 SIMCALLBACK_DECLARE(AttentionGateStd, SimEventSaveOutput); 00211 00212 //! Compute for objects the max surprise location and value 00213 /*! For each potential object, find the most surprising region, its value 00214 and store its ID from segmentImageMC2. This will then be used in later 00215 methods to extract features from the objects. 00216 */ 00217 virtual void computeMinMaxXY(const Image<float>& attentionMap, 00218 const Image<int>& segments, 00219 const Image<bool>& candidates); 00220 00221 //! extract features from the surprising locations 00222 /*! For each segmented object, take the most surprising location and extract 00223 the features. When done store the objects and features in a deque. Each 00224 element in the deque corresponds to one frame 00225 */ 00226 virtual void extractFeatureValues(SimEventQueue& q); 00227 00228 //! Find the feature distance between objects 00229 /*! For each object in each frame, compute its feature distance from 00230 all other objects using a standard sum of squares distance. 00231 */ 00232 virtual void computeFeatureDistance(); 00233 00234 virtual Image<float> getLastAttentionMap() const; 00235 00236 virtual Image<float> getCurrentAttentionMap() const; 00237 00238 //! Return all our values as an Image<float> 00239 virtual Image<float> getValue(SimEventQueue& q); 00240 00241 private: 00242 //! The first stage is a true gate. it returns a mask of what will get through 00243 /*! The simple form does not treat channels differently and only uses the 00244 general final attention map over all channels. 00245 */ 00246 void stageOneGateSimple(SimEventQueue& q); 00247 //! The first stage is a true gate. it returns a mask of what will get through 00248 /*! The complex first gate interacts the channels and can also treat some 00249 channels with different time scales with different leaky integrator 00250 constants. */ 00251 void stageOneGateComplex(SimEventQueue& q); 00252 00253 //! The second stage is an integrator which melds features across frames 00254 void stageTwoGate(SimEventQueue& q); 00255 00256 bool itsSegmentDone; 00257 00258 //! type 00259 OModelParam<std::string> itsAGStageOneType; 00260 OModelParam<std::string> itsAGStageTwoType; 00261 OModelParam<int> itsAGStageTwoEpochs; 00262 uint itsTotalEpochs; 00263 uint itsMaxStageTwoFrames; 00264 AG_METHODS_ENUM itsStageTwoGetFeatures; 00265 00266 SimTime itsT; // time of last integration 00267 SimTime itsNewT; 00268 SimTime itsTimeStep; 00269 float itsC; 00270 float itsLeak; 00271 Image<float> itsLastAttentionMap; // contains both forward and backwards 00272 Image<float> itsCurrentAttentionMap; 00273 Image<float> itsStageOneGate; 00274 Image<float> itsStageTwoGate; 00275 Image<float> itsInput; 00276 Image<int> itsStageTwoSegments; 00277 00278 //! Stage Two object location list X position 00279 std::vector<int> itsStageTwoObjectX; 00280 //! Stage Two object location list Y position 00281 std::vector<int> itsStageTwoObjectY; 00282 //! Stage Two object surprise value 00283 std::vector<float> itsStageTwoObjectVal; 00284 //! Stage Two object ID list 00285 std::vector<int> itsStageTwoObjectID; 00286 //! store the stage two objects over n frames 00287 std::deque<SimEventAttentionGateStageTwoObjects> itsStageTwoObjects; 00288 //! Object segmenter used in stage two 00289 segmentImageMC2<float,unsigned int,1> itsSegmenter; 00290 }; 00291 00292 // ###################################################################### 00293 //! The standard map 00294 // ###################################################################### 00295 /*! This is our current standard AG implementation. */ 00296 class AttentionGateStub : public AttentionGate 00297 { 00298 public: 00299 //! Uninitialized constructor 00300 AttentionGateStub(OptionManager& mgr, const std::string& descrName = 00301 "Task-Relevance Map Std", 00302 const std::string& tagName = 00303 "AttentionGateStub"); 00304 00305 //! Destructor 00306 virtual ~AttentionGateStub(); 00307 }; 00308 00309 #endif 00310 00311 // ###################################################################### 00312 /* So things look consistent in everyone's emacs... */ 00313 /* Local Variables: */ 00314 /* indent-tabs-mode: nil */ 00315 /* End: */