00001 /*!@file Surprise/SurpriseControl.H attempt to remove surprise from image */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00005 // University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: T. Nathan Mundhenk <mundhenk@usc.edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Surprise/SurpriseControl.H $ 00035 // $Id: SurpriseControl.H 7063 2006-08-29 18:26:55Z rjpeters $ 00036 // 00037 00038 #ifndef SURPRISE_CONTROL_H_DEFINED 00039 #define SURPRISE_CONTROL_H_DEFINED 00040 00041 00042 #include "Image/Image.H" 00043 #include "Image/MathOps.H" 00044 #include "Image/ColorOps.H" 00045 #include "Image/Pixels.H" 00046 #include "Raster/Raster.H" 00047 #include "Util/Assert.H" 00048 #include "Util/Timer.H" 00049 00050 #include <fstream> 00051 #include <iostream> 00052 #include <math.h> 00053 #include <string> 00054 #include <vector> 00055 #include <deque> 00056 00057 #define SC_NEAR_BLACK 0.01F 00058 #define SC_LOW_SAT 0.01F 00059 #define SMALL_SALIENCY 0.0F 00060 #define SCALE_CHANNELS 6 00061 #define SC_ORIENTS 4 00062 #define SC_EXP 20 00063 #define SC_MAX_CHANNELS 16 00064 #define SC_DEBUG false 00065 00066 using namespace std; 00067 00068 /** @name GlobalEnumerations 00069 * These varables and constant arrays help allign the data and keep everything 00070 * consistant 00071 */ 00072 //@{ 00073 00074 //! channel types enumerated should have same number as SC_MAX_CHANNELS 00075 enum sc_channels 00076 { 00077 SC_DR0, 00078 SC_DR1, 00079 SC_DR2, 00080 SC_DR3, 00081 SC_GA0, 00082 SC_GA1, 00083 SC_GA2, 00084 SC_GA3, 00085 SC_IN, 00086 SC_FL, 00087 SC_RG, 00088 SC_BY, 00089 SC_H1, 00090 SC_H2, 00091 SC_HS, 00092 SC_HV 00093 }; 00094 00095 const std::string sc_channel_name[SC_MAX_CHANNELS] = 00096 { 00097 "Direction 0", 00098 "Direction 1", 00099 "Direction 2", 00100 "Direction 3", 00101 "Orientation 0", 00102 "Orientation 1", 00103 "Orientation 2", 00104 "Orientation 3", 00105 "Intensity", 00106 "Flicker", 00107 "Red/Green", 00108 "Blue/Yellow", 00109 "H2SV H1", 00110 "H2SV H2", 00111 "H2SV S", 00112 "H2SV V" 00113 }; 00114 00115 const std::string sc_channel_name_abv[SC_MAX_CHANNELS] = 00116 { 00117 "Dir_0", 00118 "Dir_1", 00119 "Dir_2", 00120 "Dir_3", 00121 "Ori_0", 00122 "Ori_1", 00123 "Ori_2", 00124 "Ori_3", 00125 "Intens", 00126 "Flicker", 00127 "RG", 00128 "BY", 00129 "H1", 00130 "H2", 00131 "HS", 00132 "HV" 00133 }; 00134 //@} 00135 00136 00137 /*************************************************************************/ 00138 //! Control surprise in a movie 00139 template <class PIXTYPE, class BETATYPE, class FLOAT> class SurpriseControl 00140 { 00141 public: 00142 SurpriseControl(const ushort sizeX, const ushort sizeY); 00143 SurpriseControl(); 00144 ~SurpriseControl(); 00145 /** @name InputAndInit 00146 * These methods are used to init the object and input maps and variables 00147 * that affect the way SurpriseControl will work. These methods only need 00148 * to be called once 00149 */ 00150 //@{ 00151 //! Init the basic variables 00152 void SCinit(const ushort sizeX, const ushort sizeY); 00153 void SCsetMasterConspicBias(const FLOAT bias); 00154 //! use max level from conspicuity map rather than sum the levels 00155 void SCuseMaxLevel(const bool useML); 00156 //! Shoud we use the temporal component? default is yes 00157 void SCuseTemporal(const bool useTMP); 00158 //! normalize the biases with scale 00159 void SCnormalizeBiasWithScale(const bool useNBS); 00160 //! Tell us we are going to use this channel and set the bias 00161 void SCsetConspicBias(const FLOAT chan, 00162 const int chan_enum); 00163 //! give me my scale if any 00164 void SCsetMyScale(const ushort scale); 00165 //! set the bias over a single axis 00166 void SCsetAxisBias(const FLOAT X, const FLOAT Y, const FLOAT Z); 00167 //! set the bias per H2SV channel on output 00168 void SCsetH2SVBias(const FLOAT H1, const FLOAT H2, 00169 const FLOAT S, const FLOAT V); 00170 //! set the decay term over the beta map 00171 void SCsetLambda(const FLOAT lambda); 00172 //! set which frame if any is a target frame 00173 void SCsetTargetFrame(const uint frame); 00174 //! set how much the original image will be combined back 00175 void SCsetOriginalImageWeight(const FLOAT origImageBias); 00176 //! create the seperable filters per image 00177 void SCcreateAndersonSepFilters(const ushort size); 00178 //! create the seperable filters per image 00179 void SCcreateSepFilters(const FLOAT spatSigma, 00180 const FLOAT tempSigma, 00181 const FLOAT stdDevSize); 00182 //! find parameters over convolution 00183 void SCfindConvolutionEndPoints(); 00184 //@} 00185 00186 /** @name InputPerFrame 00187 * These methods need to be feed data per frame such as the saliency map 00188 * and the raw image for each frame. These should be called each frame. 00189 */ 00190 //@{ 00191 //! input the raw image frame for processing 00192 void SCinputRawImage(const Image<PixRGB<FLOAT> >& rawImage); 00193 //! input the salmap for this frame 00194 void SCinputSalMap(const Image<FLOAT>& salMap); 00195 //! input the intensity conspicuity map for this frame 00196 void SCinputConspicMap(const Image<FLOAT>& cmap,const int cmap_enum); 00197 //@} 00198 00199 /** @name InputOptional 00200 * These methods allow optional masks or bayes weight images to be 00201 * set 00202 */ 00203 //@{ 00204 //! Input a bayes weight image to bias surprise reduction 00205 void SCinputBayesWeightImage(const Image<FLOAT> &bayesImage); 00206 //! Input an independant mask to control where we apply the filter 00207 void SCinputMaskImage(const Image<FLOAT> &maskImage); 00208 //@} 00209 00210 /** @name RunPerFrame 00211 * Call to SCprocessFrameSeperable to run each frame. It should call 00212 * SCcomputeNewBeta and SCseperateConv for you. 00213 */ 00214 //@{ 00215 //! compute new betaImage values 00216 void SCcomputeNewBeta(); 00217 //! process a single frame of video using seperable filters 00218 void SCprocessFrameSeperable(); 00219 //! find the local channel biases 00220 void SCcomputeLocalBias(); 00221 //! Helper method to call SCseperateConv over x,y and z 00222 void SCseperateConvXYZ(); 00223 //! Helper method to call SCseperateConv over x,y but not z 00224 void SCseperateConvXY(); 00225 //! process each axis by itself 00226 void SCseperateConv(const char axis); 00227 //@} 00228 00229 /** @name Output 00230 * Output from each frame can be obtained by calling these methods 00231 */ 00232 //@{ 00233 //! get the alternative surprise increased version of the image 00234 Image<PixRGB<FLOAT> > SCgetSharpened(const PIXTYPE scale_factor) const; 00235 //! process and return the final image 00236 Image<PixRGB<FLOAT> > SCgetFrame() const; 00237 //! process and return the filtered image 00238 Image<PixRGB<FLOAT> > SCgetOutImage() const; 00239 //! Return the temporal offset input image 00240 Image<PixRGB<FLOAT> > SCgetInImage() const; 00241 //! get the raw PIXTYPE output image 00242 Image<PIXTYPE> SCgetRawOutImage() const; 00243 //! get the input image converted as PIXTYPE 00244 Image<PIXTYPE> SCgetRawInImage() const; 00245 //! get the beta image of smoothed salMaps used in filter 00246 Image<BETATYPE> SCgetBetaImage() const; 00247 //! Get the temporal offset we are using 00248 ushort SCgetTemporalOffset() const; 00249 //! Is the output ready? 00250 bool SCisOutputReady() const; 00251 //! Get the local bias maps proportional to effect 00252 void SCgetLocalBiasImages(Image<FLOAT> &H1, Image<FLOAT> &H2, 00253 Image<FLOAT> &S, Image<FLOAT> &V) const; 00254 //! Get the y and z parts of the seperable filter 00255 void SCgetSeperableParts(Image<PIXTYPE> &Zimg, Image<PIXTYPE> &Yimg) const; 00256 00257 //@} 00258 private: 00259 /** @name FrameBuffers 00260 * These hold full frames to allow the temporal component to work 00261 * The frame buffer holds raw frames while the beta image stores 00262 * the fully weighted surprise bias that should correspond with the 00263 * image 00264 */ 00265 //@{ 00266 //! The basic frame buffer, holds either smoothed or non-smoothed 00267 std::deque<Image<PIXTYPE> > itsFrameBuffer; 00268 //! Pointer to the current frame in the frame buffer 00269 Image<PIXTYPE> *itsFrameCurrent; 00270 //! Temporal smoothed surprise maps 00271 std::deque<Image<BETATYPE> > itsBetaImage; 00272 //! Pointer to the current frame in the beta image 00273 Image<BETATYPE> *itsBetaCurrent; 00274 //@} 00275 /** @name MiscImageHolders 00276 * These are images that hold a variety of intermediate images, 00277 * biases and input-output images 00278 */ 00279 //@{ 00280 //! intermediate processed image 00281 std::vector<Image<PIXTYPE> > itsInterImage; 00282 //! input maps of the post surprise processed image Direction 00283 Image<FLOAT> itsConspicMap[SC_MAX_CHANNELS]; 00284 //! Set of biases for each channel conspicuity map 00285 FLOAT itsConspicMapBias[SC_MAX_CHANNELS]; 00286 //! are we using this channel, set by setting the bias 00287 bool itsUseConspicMap[SC_MAX_CHANNELS]; 00288 //! The input for this frame 00289 Image<PIXTYPE> itsInImage; 00290 //! The output for this frame 00291 Image<PIXTYPE> itsOutImage; 00292 //! The final image after we process by the surprise map 00293 Image<PixRGB<FLOAT> > itsFinalImage; 00294 //@} 00295 00296 /** @name BasicBiasImages 00297 * These are images that store masks and biases used to weight the 00298 operations of the convolution operations. Per channel weights are 00299 stored as a betatype image. 00300 */ 00301 //@{ 00302 //! the current saliency map 00303 Image<FLOAT> itsSalMap; 00304 //! Special independant mask image in addition to surprise mask 00305 Image<FLOAT> itsMaskImage; 00306 //! input a bayesian bias image 00307 Image<FLOAT> itsBayesWeightImage; 00308 //! Local bias applied after accounting for channels 00309 Image<FLOAT> itsLocalBiasH1; 00310 //! Local bias applied after accounting for channels 00311 Image<FLOAT> itsLocalBiasH2; 00312 //! Local bias applied after accounting for channels 00313 Image<FLOAT> itsLocalBiasS; 00314 //! Local bias applied after accounting for channels 00315 Image<FLOAT> itsLocalBiasV; 00316 //@} 00317 00318 /** @name ConvolutionGroup 00319 * These are private members for storing pointers and indexes that 00320 * provide a basis for convolution so that things such as offsets 00321 * and kernel placement do not have to be computed on the fly 00322 */ 00323 //@{ 00324 //! store the start point for this convolution in X 00325 Image<ushort> itsXStart; 00326 //! store the start point for this convolution in Y 00327 Image<ushort> itsYStart; 00328 //! store the start point for this convolution in Z 00329 Image<ushort> itsZStart; 00330 //! store the stop point for this convolution in X 00331 Image<ushort> itsXStop; 00332 //! store the stop point for this convolution in Y 00333 Image<ushort> itsYStop; 00334 //! store the stop point for this convolution in Z 00335 Image<ushort> itsZStop; 00336 //! store the start point for this convolution in X 00337 Image<ushort> itsKXStart; 00338 //! store the start point for this convolution in Y 00339 Image<ushort> itsKYStart; 00340 //! store the start point for this convolution in Z 00341 Image<ushort> itsKZStart; 00342 //@} 00343 00344 //! is true if saliency is very small at this location 00345 Image<bool> itsSmallSaliency; 00346 //! store messages that tell us it we did not init something 00347 std::vector<std::string> itsInitMessage; 00348 /** @name KernelParts 00349 * These are the three dimensions of a seperable convolution kernel 00350 */ 00351 //@{ 00352 //! store the X part of the kernel 00353 std::vector<FLOAT> itsKalmanKernelX; 00354 //! store the Y part of the kernel 00355 std::vector<FLOAT> itsKalmanKernelY; 00356 //! store the Z part of the kernel 00357 std::vector<FLOAT> itsKalmanKernelZ; 00358 //@} 00359 00360 //! store messages that tell us it we did not init something 00361 std::vector<bool> itsInit; 00362 //! blank pixel used in run time typing 00363 PixHyper<FLOAT,SC_MAX_CHANNELS> itsHyper; 00364 //! set to true when the frame buffer is full 00365 bool itsBufferFull; 00366 bool itsInitBuffer; 00367 /** @name ConstantBiases 00368 * These are the constant biases used to affect the whole process 00369 * For instance, we can bias each axis of x,y and z independantly if we want 00370 * or we can bias each channel. 00371 * 00372 */ 00373 //@{ 00374 //! the channel biases 00375 FLOAT itsConBias[SC_MAX_CHANNELS]; 00376 //! A master bias which is applied equally to all channels 00377 FLOAT itsMasterConspicBias; 00378 //! The decay term over beta 00379 FLOAT itsLambda; 00380 //! Bias over the X axis 00381 FLOAT itsXBias; 00382 //! Bias over the Y axis 00383 FLOAT itsYBias; 00384 //! Bias over the Z axis 00385 FLOAT itsZBias; 00386 //! Bias to H1 00387 FLOAT itsH1Bias; 00388 //! Bias to H2 00389 FLOAT itsH2Bias; 00390 //! Bias to S 00391 FLOAT itsSBias; 00392 //! Bias to V 00393 FLOAT itsVBias; 00394 //! how much of the original image should we add back 00395 FLOAT itsOriginalImageWeight; 00396 //@} 00397 00398 /** @name ConstantSizesAndOffsets 00399 * These define constant sizes for the kernel and input image as well as 00400 * offsets to target frame. 00401 */ 00402 //@{ 00403 //! What is the target frame if any 00404 uint itsTargetFrame; 00405 //! Internal Iteration counter 00406 uint itsIterCounter; 00407 //! the size of the X kernel 00408 ushort itsKernelSizeX; 00409 //! the size of the Y kernel 00410 ushort itsKernelSizeY; 00411 //! the size of the Z kernel 00412 ushort itsKernelSizeZ; 00413 //! the size of the image in X 00414 ushort itsImageSizeX; 00415 //! the size of the image in Y 00416 ushort itsImageSizeY; 00417 //! the size of the image in Z (deque size) 00418 ushort itsImageSizeZ; 00419 //! offset on when we start the temporal component 00420 ushort itsTemporalOffset; 00421 //! My scale if known 00422 ushort itsScale; 00423 //@} 00424 00425 //! Use max or combined surprise values 00426 bool itsUseMaxLevel; 00427 //! Use correlation matrix for biased surprise removal 00428 bool itsUseCorrMatrixSet; 00429 //! turn on using bayesian weighting of surprise reduction 00430 bool itsUseBayesWeightImage; 00431 //! use another independant mask image 00432 bool itsUseMaskImage; 00433 //! Set to true if you have a target frame 00434 bool itsUseTargetFrame; 00435 //! are we ready to give output 00436 bool itsOutputReady; 00437 //! Should we use the temporal component or not? 00438 bool itsUseTemporal; 00439 //! Should we normalize the H1,H2,S and V bias with scale? 00440 bool itsNormalizeBiasWithScale; 00441 }; 00442 #endif 00443