00001 /*!@file Surprise/ScaleRemoveSurprise.H attempt to remove surprise from image */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00005 // University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: T. Nathan Mundhenk <mundhenk@usc.edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Surprise/ScaleRemoveSurprise.H $ 00035 // $Id: ScaleRemoveSurprise.H 6795 2006-06-29 20:45:32Z rjpeters $ 00036 // 00037 00038 #ifndef SCALE_REMOVE_SURPRISE_H_DEFINED 00039 #define SCALE_REMOVE_SURPRISE_H_DEFINED 00040 00041 #include "Surprise/RemoveSurprise.H" 00042 #include "Util/readConfig.H" 00043 #include "Image/MathOps.H" 00044 00045 #define PIX_H2SV_TYPE PixH2SV1 00046 00047 using namespace std; 00048 //! remove surprise using scales, FLOAT is either float or double for precision 00049 template <class FLOAT> class ScaleRemoveSurprise 00050 { 00051 public: 00052 //! default constructor, call with base image size for frames 00053 ScaleRemoveSurprise(const ushort sizeX, 00054 const ushort sizeY, 00055 const string confFile = "null"); 00056 //! default destructor 00057 ~ScaleRemoveSurprise(); 00058 //! is called by default constructor, sets stuff up 00059 void SRSinit(const ushort sizeX, 00060 const ushort sizeY); 00061 //! input a raw frame, give the frame number as well 00062 void SRSinputRawImage(const Image<PixRGB<FLOAT> >& rawImage, 00063 const uint frame); 00064 //! input the base saliency map for this frame 00065 void SRSinputSalMap(const Image<FLOAT>& salMap); 00066 //! input an "anti" image set that reduce the process for target features 00067 void SRSsetAntiWeights(); 00068 //! input an "anti" image set that reduce the process for target features 00069 void SRSsetAntiWeightsInteract(const uint aframes, 00070 const uint bframes); 00071 //! set up initial bayes weights for biasing from features 00072 void SRScomputeBayesFeatureBias(const uint frames, 00073 const string baseFileNamePrefix, 00074 const string antiFileNamePrefix); 00075 //! Open a bayes feature bias already computed 00076 void SRSopenBayesFeatureBias(const string baseFileNamePrefix, 00077 const string antiFileNamePrefix); 00078 //! Find weights for biasing from features for current image 00079 void SRScomputeBayesFeatureCurrent(const uint frame, 00080 const string fileNamePrefix); 00081 //! process this movie frame 00082 void SRSprocessFrame(); 00083 //! get the resulting frame processed 00084 Image<PixRGB<FLOAT> > SRSgetFrame() const; 00085 //! get the difference image between the input and final output 00086 Image<PixRGB<FLOAT> > SRSgetDiffImage() const; 00087 //! compute difference difference parts for this image over rawImage 00088 std::vector<Image<PixRGB<FLOAT> > > SRSgetDiffParts() const; 00089 //! compute the combined beta map for all scales 00090 std::vector<Image<FLOAT> > SRSgetBetaParts() const; 00091 private: 00092 //! Vector of surprise removers 00093 std::vector<RemoveSurprise<PIX_H2SV_TYPE<FLOAT>,PixHyper<FLOAT,6>,FLOAT> > 00094 itsRemoveSurprise; 00095 //! base for readConfig 00096 readConfig itsReadConfig; 00097 //! result image per scale 00098 std::vector<Image<PixRGB<FLOAT> > > itsResultImages; 00099 //! bias to apply to each scale 00100 std::vector<FLOAT> itsScaleBias; 00101 //! Power to the filters at each scale 00102 std::vector<FLOAT> itsScalePower; 00103 //! how much to desaturate surprising color 00104 std::vector<FLOAT> itsDesatBias; 00105 //! store reverse computed filter sizez Z 00106 std::vector<FLOAT> itsFilterSizesZ; 00107 //! store the actual pyramid image sizes reverse computed 00108 std::vector<ushort> itsImageSizesX; 00109 //! store the actual pyramid image sizes reverse computed 00110 std::vector<ushort> itsImageSizesY; 00111 //! store reverse computed filter sizez X 00112 std::vector<ushort> itsFilterSizesX; 00113 //! store reverse computed filter sizez Y 00114 std::vector<ushort> itsFilterSizesY; 00115 //! the raw input image 00116 Image<PixRGB<FLOAT> > itsRawImage; 00117 //! the final image 00118 Image<PixRGB<FLOAT> > itsFinalImage; 00119 //! stored salmap frame 00120 Image<FLOAT> itsSalMap; 00121 //! base correlation image 00122 Image<FLOAT> itsBaseCorr; 00123 //! compute R from Corr for display purposes 00124 Image<FLOAT> itsBaseR; 00125 //! base image mean 00126 Image<FLOAT> itsBaseMean; 00127 //! base image STD 00128 Image<FLOAT> itsBaseSTD; 00129 //! base image SS 00130 Image<FLOAT> itsBaseSS; 00131 //! base likelyhood image 00132 Image<FLOAT> itsBaseLikelyhood; 00133 //! Non normalized likelyhood 00134 Image<FLOAT> itsNonNormalizedBaseL; 00135 //! anti correlation image 00136 Image<FLOAT> itsAntiCorr; 00137 //! compute R from Corr for display purposes 00138 Image<FLOAT> itsAntiR; 00139 //! anti image mean 00140 Image<FLOAT> itsAntiMean; 00141 //! anti image STD 00142 Image<FLOAT> itsAntiSTD; 00143 //! anti image SS 00144 Image<FLOAT> itsAntiSS; 00145 //! anti likelyhood image 00146 Image<FLOAT> itsAntiLikelyhood; 00147 //! Non normalized likelyhood 00148 Image<FLOAT> itsNonNormalizedAntiL; 00149 //! Bayesian P image between anti and base image and input 00150 Image<FLOAT> itsBayesImage; 00151 //! Augmented bayes image for beliefs 00152 Image<FLOAT> itsBeliefImage; 00153 //! bias to apply to X axis convolution 00154 FLOAT itsAxisBiasX; 00155 //! bias to apply to Y axis convolution 00156 FLOAT itsAxisBiasY; 00157 //! bias to apply to Z axis convolution 00158 FLOAT itsAxisBiasZ; 00159 //! bias to apply to intensity channel 00160 FLOAT itsINBias; 00161 //! bias to apply to direction channel 00162 FLOAT itsDRBias; 00163 //! bias to apply to flicker channel 00164 FLOAT itsFLBias; 00165 //! bias to apply to gaussian channel 00166 FLOAT itsGABias; 00167 //! bias to apply to red/green channel 00168 FLOAT itsRGBias; 00169 //! bias to apply to blue/yellow channel 00170 FLOAT itsBYBias; 00171 //! Lambda to smooth reaction of filters over time 00172 FLOAT itsLambda; 00173 //! How big should we convolve in standard deviation 00174 FLOAT itsStdSize; 00175 //! What should be the size of the temporal filter 00176 FLOAT itsZSigma; 00177 //! Bias to H1 00178 FLOAT itsH1bias; 00179 //! Bias to H2 00180 FLOAT itsH2bias; 00181 //! Bias to S 00182 FLOAT itsSbias; 00183 //! Bias to V 00184 FLOAT itsVbias; 00185 //! The current video frame we are looking at 00186 uint itsFrame; 00187 //! base image N 00188 uint itsBaseN; 00189 //! anti image N 00190 uint itsAntiN; 00191 //! base size of the filters used in the image pyramid e.g. 5 or 9 00192 ushort itsBaseFilterSize; 00193 //! LevelSpec LevMin 00194 ushort itsLevMin; 00195 //! LevelSpec LevMax 00196 ushort itsLevMax; 00197 //! LevelSpec DelMin 00198 ushort itsDelMin; 00199 //! LevelSpec DelMax 00200 ushort itsDelMax; 00201 //! LevelSpec itsMapLevel 00202 ushort itsMapLevel; 00203 //! LevelSpec itsMaxIndex 00204 ushort itsMaxIndex; 00205 //! LevelSpec itsMaxDepth 00206 ushort itsMaxDepth; 00207 //! This images size X 00208 ushort itsImageSizeX; 00209 //! This images size Y 00210 ushort itsImageSizeY; 00211 //! This images size X at first pyramid level 00212 ushort itsImageBaseX; 00213 //! This images size Y at first pyramid level 00214 ushort itsImageBaseY; 00215 //! The filter size X in pyramid 00216 ushort itsFilterSizeX; 00217 //! The filter size Y in pyramid 00218 ushort itsFilterSizeY; 00219 //! should we use a true kalman filter 00220 bool itsUseKalman; 00221 //! should we use the max level and not sum for conspicuity maps? 00222 bool itsUseMaxLevel; 00223 }; 00224 00225 00226 #endif