00001 /*!@file Beobot/BeobotBrainMT.H efficient implementation of the 00002 feature pyramids computation, Saliency, Gist, Shape Estimator, 00003 for Beobot */ 00004 00005 // //////////////////////////////////////////////////////////////////// // 00006 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00007 // University of Southern California (USC) and the iLab at USC. // 00008 // See http://iLab.usc.edu for information about this project. // 00009 // //////////////////////////////////////////////////////////////////// // 00010 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00011 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00012 // in Visual Environments, and Applications'' by Christof Koch and // 00013 // Laurent Itti, California Institute of Technology, 2001 (patent // 00014 // pending; application number 09/912,225 filed July 23, 2001; see // 00015 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00016 // //////////////////////////////////////////////////////////////////// // 00017 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00018 // // 00019 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00020 // redistribute it and/or modify it under the terms of the GNU General // 00021 // Public License as published by the Free Software Foundation; either // 00022 // version 2 of the License, or (at your option) any later version. // 00023 // // 00024 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00025 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00026 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00027 // PURPOSE. See the GNU General Public License for more details. // 00028 // // 00029 // You should have received a copy of the GNU General Public License // 00030 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00031 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00032 // Boston, MA 02111-1307 USA. // 00033 // //////////////////////////////////////////////////////////////////// // 00034 // 00035 // Primary maintainer for this file: Christian Siagian <siagian@usc.edu> 00036 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Beobot/BeobotBrainMT.H $ 00037 // $Id: BeobotBrainMT.H 12127 2009-12-06 03:05:23Z siagian $ 00038 // 00039 00040 // ###################################################################### 00041 00042 #ifndef BEOBOT_BEOBOTBRAINMT_H_DEFINED 00043 #define BEOBOT_BEOBOTBRAINMT_H_DEFINED 00044 00045 // saliency map scale params 00046 // we start at level_min = 1 (not 2) 00047 // because the image is already scaled to 160x120 00048 // (half the usual dimension) 00049 #define delta_min 3 00050 #define delta_max 4 00051 #define level_min 1 00052 #define level_max 3 00053 #define maxdepth (level_max + delta_max + 1) 00054 #define normtyp (VCXNORM_MAXNORM) 00055 00056 // designated pyramid level of saliency map 00057 #define sml 2 00058 00059 // image size vars : FIX CAN"T BE STATIC LIKE THIS 00060 #define IMAGE_WIDTH 160 00061 #define IMAGE_HEIGHT 120 00062 00063 // relative feature weights: 00064 #define IWEIGHT 0.7 00065 #define CWEIGHT 1.0 00066 #define OWEIGHT 1.0 00067 00068 // action definitions CHANGE TO ENUM? FIX? 00069 #define NUM_CHANNELS 7 00070 #define REDGREEN 0 00071 #define BLUEYELLOW 1 00072 #define INTENSITY 2 00073 #define ORI0 3 00074 #define ORI45 4 00075 #define ORI90 5 00076 #define ORI135 6 00077 00078 #define numBBMTthreads 7 00079 00080 #include "Component/ModelComponent.H" 00081 #include "Component/ModelParam.H" 00082 #include "Neuro/VisualCortex.H" 00083 #include "Image/Image.H" 00084 #include "Image/ImageSet.H" 00085 #include "Image/ColorOps.H" 00086 #include "Image/PyramidOps.H" 00087 #include "Image/Pixels.H" 00088 #include "GUI/XWinManaged.H" 00089 #include "Raster/Raster.H" 00090 #include "Util/Timer.H" 00091 00092 #include <list> 00093 #include <pthread.h> 00094 00095 00096 // ###################################################################### 00097 00098 struct jobData 00099 { 00100 jobData() { }; 00101 00102 jobData(const int jt, const PyramidType pt, const float w, 00103 const float ori = 0.0F) : 00104 jobType(jt), ptyp(pt), weight(w), orientation(ori) { } 00105 00106 int jobType; 00107 PyramidType ptyp; 00108 float weight; 00109 float orientation; 00110 }; 00111 00112 // ###################################################################### 00113 //! Brain of a Beobot 00114 //! this is not derived from Brain 00115 //! specifically used for the Beobot system 00116 class BeobotBrainMT : public ModelComponent 00117 { 00118 public: 00119 00120 // ###################################################################### 00121 /*! @name Constructors and Destructors */ 00122 //@{ 00123 00124 //! Constructor 00125 BeobotBrainMT(OptionManager& mgr, 00126 const std::string& descrName = "Beobot Brain MT", 00127 const std::string& tagName = "BeobotBrainMT"); 00128 00129 //! Destructor 00130 virtual ~BeobotBrainMT(); 00131 00132 //@} 00133 00134 // ###################################################################### 00135 //! @name Access functions 00136 //@{ 00137 00138 //! get the winning channel 00139 inline int getWinChan(uint i); 00140 00141 //! get the winning submap number 00142 inline int getWinSubmapNum(uint i); 00143 00144 //! Get last inputted image 00145 inline Image< PixRGB<byte> > getCurrImage(); 00146 00147 //! Get current conspicuity image 00148 inline Image<float> getCurrCMap(uint cnum); 00149 00150 //! get salient point for object i 00151 inline Point2D<int> getSalPoint(uint i); 00152 00153 //! get the pre-attentive feature vector of the passed in coordinate 00154 void getFeatures(Point2D<int> p, std::vector<double>& v); 00155 00156 //! get the bounding box for object i 00157 inline Rectangle getObjRect(uint i); 00158 00159 //! get the salient features for object i 00160 inline void getSalientFeatures( uint i, std::vector<double>& v); 00161 00162 //! get the time it takes to produce the gist and saliency features 00163 inline float getProcessTime(); 00164 00165 //! Get last computed Gist Vector 00166 Image<double> getGist(); 00167 00168 //! Get last computed Saliency Map 00169 Image<float> getSalMap(); 00170 00171 //@} 00172 00173 // ###################################################################### 00174 /*! @name member functions */ 00175 //@{ 00176 00177 //! Setup up with image to be processed and build the job queue 00178 void input(const Image< PixRGB<byte> > img); 00179 00180 //! Is the last given input done processing? 00181 bool outputReady(); 00182 00183 //! For internal thread use: Compute a conspicuity map from an image 00184 void threadCompute(); 00185 00186 //! get number of objects found in current image 00187 int getNumSalPoint(); 00188 00189 //@} 00190 00191 protected: 00192 void start1(); //!< get started 00193 void stop2(); //!< get stopped 00194 00195 private: 00196 00197 //! compute the Color, Intensity, and Orientation (CIO) pyramids 00198 void computeCIOpyr(Image< PixRGB<byte> > img); 00199 00200 //! find all the objects in the image 00201 void findObjects(); 00202 00203 //! compute the salient feature vectors 00204 void computeSalientFeatures(); 00205 00206 //! scaling dimensions 00207 Point2D<int> downScaleCoords(Point2D<int> winner, Dims targetDims); 00208 00209 //! Get the winning map for Shape eastimation 00210 bool setWinningMap(Point2D<int> winner); 00211 00212 //! return a binary mask (0 and 255) of the attended object 00213 //! in saliency map scale 00214 Image<byte> getObjectMask(); 00215 00216 //! Get the shape estimator mask 00217 Rectangle getSEBoundingBox(); 00218 00219 //! correct bounding box of object for SIFT recognition 00220 Rectangle correctBB(Rectangle r, Point2D<int> locn, int w, int h); 00221 00222 //! display intermediate maps 00223 //void display(); 00224 00225 Image< PixRGB<byte> > itsCurrImg; //!< current color image 00226 int itsCurrImgWidth; 00227 int itsCurrImgHeight; 00228 Image<float> itsCurrLumImg; //!< current luminance image 00229 Image<byte> r, g, b, y; //!< current RGBY images 00230 Image<float> rgimg; 00231 Image<float> byimg; 00232 Image<float> itsSalmap; //!< output saliency map 00233 bool gotLum, gotRGBY; 00234 00235 //! the number of channels we emulate 00236 uint itsNumChannels; 00237 00238 //! the channels weights 00239 std::vector<float> itsChanWeight; 00240 00241 //! conspicuity maps 00242 std::vector<Image<float> > itsCMaps; 00243 00244 //! center surround maps storage before normalization 00245 //! used for feature comparison 00246 std::vector<std::vector<Image<float> > > itsRawCSMaps; 00247 00248 //! center surround maps storage after normalization 00249 //! used for saliency map 00250 std::vector<std::vector<Image<float> > > itsCSMaps; 00251 00252 //! feature maps storage 00253 std::vector<ImageSet<float> > itsImgPyrs; 00254 00255 //! gist feature vector 00256 Image<double> itsGistVector; 00257 00258 //! total number of gist features 00259 uint itsGistFeatSize; 00260 00261 //! the list of winning points 00262 std::vector<Point2D<int> > itsWinner; 00263 00264 //! the current list of object segmented region 00265 std::vector<Rectangle> itsObjRect; 00266 00267 //! salient feature vector 00268 std::vector<std::vector<double> > itsSalientFeatures; 00269 00270 //! index of the winning channel 00271 std::vector<uint> itsWinningChan; 00272 00273 //! index of the winning submap of the winning channel 00274 std::vector<uint> itsWinningSubmapNum; 00275 00276 //! a copy of the winning map 00277 std::vector<Image<float> > itsWinningMap; 00278 00279 //! store the last computed Shape Estimator mask 00280 //! a binary mask {0,255} in saliency map coordinates 00281 //! specify the extend of the object 00282 std::vector<Image<byte> > itsObjectMask; 00283 00284 //! the structuring element for eroding and dilating 00285 //! for the chamfer smoothing method 00286 Image<byte> structEl; 00287 00288 //! job queue and number of jobs to do 00289 std::list<jobData> jobQueue; 00290 uint jobsTodo; 00291 rutz::shared_ptr<Timer> itsTimer; 00292 float itsProcessTime; 00293 00294 //! thread stuff 00295 pthread_t *worker; 00296 pthread_mutex_t jobLock; //!< locking jobQueue 00297 pthread_mutex_t mapLock; //!< locking related saliency maps 00298 pthread_mutex_t gistLock; //!< locking gist vectors 00299 pthread_cond_t jobCond; 00300 uint numWorkers; 00301 00302 XWinManaged* itsWin; 00303 }; 00304 00305 // ###################################################################### 00306 // Implementation for BeobotBrainMT inline functions 00307 // ###################################################################### 00308 inline Image< PixRGB<byte> > BeobotBrainMT::getCurrImage() 00309 { return itsCurrImg; } 00310 00311 inline int BeobotBrainMT::getWinChan(uint i) 00312 { ASSERT(i < itsWinningChan.size()); return itsWinningChan[i]; } 00313 00314 inline int BeobotBrainMT::getWinSubmapNum(uint i) 00315 { ASSERT(i < itsWinningSubmapNum.size()); return itsWinningSubmapNum[i]; } 00316 00317 inline Image<float> BeobotBrainMT::getCurrCMap(uint i) 00318 { ASSERT(i < itsCMaps.size()); return itsCMaps[i]; } 00319 00320 inline int BeobotBrainMT::getNumSalPoint() 00321 { return itsWinner.size(); } 00322 00323 inline Point2D<int> BeobotBrainMT::getSalPoint(uint i) 00324 { ASSERT(i < itsWinner.size()); return itsWinner[i]; } 00325 00326 inline Rectangle BeobotBrainMT::getObjRect(uint i) 00327 { ASSERT(i < itsObjRect.size()); return itsObjRect[i]; } 00328 00329 inline void BeobotBrainMT::getSalientFeatures( uint i, std::vector<double>& v) 00330 { 00331 ASSERT(i < itsSalientFeatures.size()); 00332 v.clear(); 00333 for(uint j = 0; j < itsSalientFeatures[i].size(); j++) 00334 v.push_back(itsSalientFeatures[i][j]); 00335 } 00336 00337 inline float BeobotBrainMT::getProcessTime() 00338 { return itsProcessTime; } 00339 00340 #endif 00341 00342 // ###################################################################### 00343 /* So things look consistent in everyone's emacs... */ 00344 /* Local Variables: */ 00345 /* indent-tabs-mode: nil */ 00346 /* End: */