00001 /*!@file Neuro/ScaleSurpriseControl.H attempt to remove surprise from image */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00005 // University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: T. Nathan Mundhenk <mundhenk@usc.edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Neuro/ScaleSurpriseControl.H $ 00035 // $Id: ScaleSurpriseControl.H 10772 2009-02-05 17:28:49Z itti $ 00036 // 00037 00038 #ifndef SCALE_SURPRISE_CONTROL_H_DEFINED 00039 #define SCALE_SURPRISE_CONTROL_H_DEFINED 00040 00041 #include "Surprise/SurpriseControl.H" 00042 00043 #include "Channels/BlueYellowChannel.H" 00044 #include "Channels/ChannelBase.H" 00045 #include "Channels/ColorChannel.H" 00046 #include "Channels/DirectionChannel.H" 00047 #include "Channels/FlickerChannel.H" 00048 #include "Channels/GaborChannel.H" 00049 #include "Channels/H2SVChannel.H" 00050 #include "Channels/Hue1Channel.H" 00051 #include "Channels/Hue2Channel.H" 00052 #include "Channels/IntensityChannel.H" 00053 #include "Channels/MotionChannel.H" 00054 #include "Channels/OrientationChannel.H" 00055 #include "Channels/RedGreenChannel.H" 00056 #include "Channels/SaturationChannel.H" 00057 #include "Channels/ValueIntensityChannel.H" 00058 #include "Component/OptionManager.H" 00059 #include "Image/MathOps.H" 00060 #include "Image/ShapeOps.H" 00061 #include "Image/ColorOps.H" 00062 #include "Image/Normalize.H" 00063 #include "Neuro/Brain.H" 00064 #include "Neuro/SaliencyMap.H" 00065 #include "Neuro/VisualCortex.H" 00066 #include "Transport/FrameInfo.H" 00067 #include "Transport/FrameOstream.H" 00068 #include "Util/readConfig.H" 00069 #include "rutz/shared_ptr.h" 00070 00071 00072 00073 #define PIX_H2SV_TYPE PixH2SV2 00074 00075 using namespace std; 00076 //! remove surprise using scales, FLOAT is either float or double for precision 00077 template <class FLOAT> class ScaleSurpriseControl 00078 { 00079 public: 00080 //! default constructor, call with base image size for frames 00081 ScaleSurpriseControl(const ushort sizeX, 00082 const ushort sizeY, 00083 const string confFile = "null"); 00084 //! default constructor need to call SSCinit and SSCreadConfig 00085 ScaleSurpriseControl(); 00086 //! default destructor 00087 ~ScaleSurpriseControl(); 00088 00089 /** @name InputAndInit 00090 * These methods are used to init the object and input maps and variables 00091 * that affect the way ScaleSurpriseControl will work. These methods only 00092 * need to be called once 00093 */ 00094 //@{ 00095 //! Input LevelSpec info directly insted of from a config file 00096 void SSCsetLevelSpecInfo(const uint levMin, const uint levMax, 00097 const uint delMin, const uint delMax, 00098 const uint mapLevel, const uint maxIndex, 00099 const uint maxDepth); 00100 //! read in config values 00101 void SSCreadConfig(const string confFile); 00102 //! is called by default constructor, sets stuff up 00103 void SSCinit(const ushort sizeX, const ushort sizeY); 00104 //@} 00105 00106 /** @name InputPerFrame 00107 * These methods need to be feed data per frame such as the saliency map 00108 * and the raw image for each frame. These should be called each frame. 00109 */ 00110 //@{ 00111 //! input a raw frame, give the frame number as well 00112 void SSCinputRawImage(const Image<PixRGB<FLOAT> >& rawImage); 00113 //! input the base saliency map for this frame 00114 void SSCinputSalMap(const Image<FLOAT>& salMap); 00115 //@} 00116 00117 /** @name InputOptional 00118 * These methods allow optional masks or bayes weight images to be 00119 * set 00120 */ 00121 //@{ 00122 //! input and independant mask image if desired 00123 void SSCinputMaskImage(const Image<FLOAT>& maskImage); 00124 //! input a bayes weight image if desired 00125 void SSCinputBayesWeightImage(const Image<FLOAT>& bayesImage); 00126 //@} 00127 00128 /** @name RunPerFrame 00129 * Call to SSCprocessFrame to run each frame. It should call 00130 * each SurpriseControl at each scale for you. 00131 */ 00132 //@{ 00133 //! process this movie frame using a brain 00134 void SSCprocessFrame(Brain* brain); 00135 //! process this movie frame, no brain NO-OP 00136 void SSCprocessFrame(const uint frame); 00137 //@} 00138 00139 /** @name Output 00140 * Output from each frame can be obtained by calling these methods 00141 */ 00142 //@{ 00143 //! get the resulting frame processed 00144 Image<PixRGB<FLOAT> > SSCgetFrame() const; 00145 //! get the difference image between the input and final output 00146 Image<PixRGB<FLOAT> > SSCgetDiffImage( 00147 const bool normalize = false) const; 00148 //! compute difference difference parts for this image over rawImage 00149 std::vector<Image<PixRGB<FLOAT> > > SSCgetDiffParts() const; 00150 //! compute the combined beta map for all scales 00151 std::vector<Image<FLOAT> > SSCgetBetaParts( 00152 const bool normalize = false) const; 00153 //! return the bias images used in smoothing etc. 00154 void SSCgetBiasParts(std::vector<Image<PixRGB<FLOAT> > > &H1, 00155 std::vector<Image<PixRGB<FLOAT> > > &H2, 00156 std::vector<Image<PixRGB<FLOAT> > > &S, 00157 std::vector<Image<PixRGB<FLOAT> > > &V) const; 00158 //! Get the y and z parts of the seperable filter 00159 void SSCgetSeperableParts(std::vector<Image<PixRGB<FLOAT> > > &Zimgs, 00160 std::vector<Image<PixRGB<FLOAT> > > &Yimgs, 00161 const bool normalize = false) const; 00162 //@} 00163 private: 00164 //! Internal frame counter 00165 unsigned long itsFrameCounter; 00166 //! Vector of surprise removers 00167 std::vector<SurpriseControl<PIX_H2SV_TYPE<FLOAT>, 00168 PixHyper<FLOAT,SC_MAX_CHANNELS>,FLOAT> > 00169 itsSurpriseControl; 00170 //! base for readConfig 00171 readConfig itsReadConfig; 00172 //! result image per scale 00173 std::vector<Image<PixRGB<FLOAT> > > itsResultImages; 00174 //! bias to apply to each scale 00175 std::vector<FLOAT> itsScaleBias; 00176 //! Power to the filters at each scale 00177 std::vector<FLOAT> itsScalePower; 00178 //! store reverse computed filter sizez Z 00179 std::vector<FLOAT> itsFilterSizesZ; 00180 //! store the actual pyramid image sizes reverse computed 00181 std::vector<ushort> itsImageSizesX; 00182 //! store the actual pyramid image sizes reverse computed 00183 std::vector<ushort> itsImageSizesY; 00184 //! store reverse computed filter sizez X 00185 std::vector<ushort> itsFilterSizesX; 00186 //! store reverse computed filter sizez Y 00187 std::vector<ushort> itsFilterSizesY; 00188 //! the raw input image 00189 Image<PixRGB<FLOAT> > itsRawImage; 00190 //! the final image 00191 Image<PixRGB<FLOAT> > itsFinalImage; 00192 //! stored salmap frame 00193 Image<FLOAT> itsSalMap; 00194 //! a bayes weight image if desired 00195 Image<FLOAT> itsBayesWeightImage; 00196 //! an independant mask if desired 00197 Image<FLOAT> itsMaskImage; 00198 //! bias to apply to each channel 00199 FLOAT itsConspicMapBias[SC_MAX_CHANNELS]; 00200 //! bias to apply to X axis convolution 00201 FLOAT itsAxisBiasX; 00202 //! bias to apply to Y axis convolution 00203 FLOAT itsAxisBiasY; 00204 //! bias to apply to Z axis convolution 00205 FLOAT itsAxisBiasZ; 00206 //! Lambda to smooth reaction of filters over time 00207 FLOAT itsLambda; 00208 //! How big should we convolve in standard deviation 00209 FLOAT itsStdSize; 00210 //! What should be the size of the temporal filter 00211 FLOAT itsZSigma; 00212 //! Bias to H1 00213 FLOAT itsH1Bias; 00214 //! Bias to H2 00215 FLOAT itsH2Bias; 00216 //! Bias to S 00217 FLOAT itsSBias; 00218 //! Bias to V 00219 FLOAT itsVBias; 00220 //! Master bias over all conspicuity maps 00221 FLOAT itsMasterConspicBias; 00222 //! how much of the original image should we add back 00223 FLOAT itsOriginalImageWeight; 00224 //! If we sharpen the image, what factor should we use? 00225 FLOAT itsSharpFactorH1; 00226 //! If we sharpen the image, what factor should we use? 00227 FLOAT itsSharpFactorH2; 00228 //! If we sharpen the image, what factor should we use? 00229 FLOAT itsSharpFactorS; 00230 //! If we sharpen the image, what factor should we use? 00231 FLOAT itsSharpFactorV; 00232 //! base size of the filters used in the image pyramid e.g. 5 or 9 00233 ushort itsBaseFilterSize; 00234 //! LevelSpec LevMin 00235 ushort itsLevMin; 00236 //! LevelSpec LevMax 00237 ushort itsLevMax; 00238 //! LevelSpec DelMin 00239 ushort itsDelMin; 00240 //! LevelSpec DelMax 00241 ushort itsDelMax; 00242 //! LevelSpec itsMapLevel 00243 ushort itsMapLevel; 00244 //! LevelSpec itsMaxIndex 00245 ushort itsMaxIndex; 00246 //! LevelSpec itsMaxDepth 00247 ushort itsMaxDepth; 00248 //! This images size X 00249 ushort itsImageSizeX; 00250 //! This images size Y 00251 ushort itsImageSizeY; 00252 //! This images size X at first pyramid level 00253 ushort itsImageBaseX; 00254 //! This images size Y at first pyramid level 00255 ushort itsImageBaseY; 00256 //! The filter size X in pyramid 00257 ushort itsFilterSizeX; 00258 //! The filter size Y in pyramid 00259 ushort itsFilterSizeY; 00260 //! should we use the max level and not sum for conspicuity maps? 00261 bool itsUseMaxLevel; 00262 //! Have we set the levelspec? 00263 bool itsLevelSpecSet; 00264 //! should we get a reduced or sharpened image? 00265 bool itsGetReduced; 00266 //! Use an emulation of Andersons seperable filter for the kernel 00267 bool itsUseAndersonSeperable; 00268 //! Should we use the temporal component when smoothing out surprise 00269 bool itsUseTemporal; 00270 //! Should we normalize the H1,H2,S and V bias with scale? 00271 bool itsNormalizeBiasWithScale; 00272 }; 00273 00274 00275 #endif