SurpriseControl.H

Go to the documentation of this file.
00001 /*!@file Surprise/SurpriseControl.H attempt to remove surprise from image */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the //
00005 // University of Southern California (USC) and the iLab at USC.         //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: T. Nathan Mundhenk <mundhenk@usc.edu>
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Surprise/SurpriseControl.H $
00035 // $Id: SurpriseControl.H 7063 2006-08-29 18:26:55Z rjpeters $
00036 //
00037 
00038 #ifndef SURPRISE_CONTROL_H_DEFINED
00039 #define SURPRISE_CONTROL_H_DEFINED
00040 
00041 
00042 #include "Image/Image.H"
00043 #include "Image/MathOps.H"
00044 #include "Image/ColorOps.H"
00045 #include "Image/Pixels.H"
00046 #include "Raster/Raster.H"
00047 #include "Util/Assert.H"
00048 #include "Util/Timer.H"
00049 
00050 #include <fstream>
00051 #include <iostream>
00052 #include <math.h>
00053 #include <string>
00054 #include <vector>
00055 #include <deque>
00056 
00057 #define SC_NEAR_BLACK   0.01F
00058 #define SC_LOW_SAT      0.01F
00059 #define SMALL_SALIENCY  0.0F
00060 #define SCALE_CHANNELS  6
00061 #define SC_ORIENTS      4
00062 #define SC_EXP          20
00063 #define SC_MAX_CHANNELS 16
00064 #define SC_DEBUG        false
00065 
00066 using namespace std;
00067 
00068 /** @name GlobalEnumerations
00069  *  These varables and constant arrays help allign the data and keep everything
00070  *  consistant
00071  */
00072 //@{
00073 
00074 //! channel types enumerated should have same number as SC_MAX_CHANNELS
00075 enum sc_channels
00076 {
00077   SC_DR0,
00078   SC_DR1,
00079   SC_DR2,
00080   SC_DR3,
00081   SC_GA0,
00082   SC_GA1,
00083   SC_GA2,
00084   SC_GA3,
00085   SC_IN,
00086   SC_FL,
00087   SC_RG,
00088   SC_BY,
00089   SC_H1,
00090   SC_H2,
00091   SC_HS,
00092   SC_HV
00093 };
00094 
00095 const std::string sc_channel_name[SC_MAX_CHANNELS] =
00096 {
00097   "Direction 0",
00098   "Direction 1",
00099   "Direction 2",
00100   "Direction 3",
00101   "Orientation 0",
00102   "Orientation 1",
00103   "Orientation 2",
00104   "Orientation 3",
00105   "Intensity",
00106   "Flicker",
00107   "Red/Green",
00108   "Blue/Yellow",
00109   "H2SV H1",
00110   "H2SV H2",
00111   "H2SV S",
00112   "H2SV V"
00113 };
00114 
00115 const std::string sc_channel_name_abv[SC_MAX_CHANNELS] =
00116 {
00117   "Dir_0",
00118   "Dir_1",
00119   "Dir_2",
00120   "Dir_3",
00121   "Ori_0",
00122   "Ori_1",
00123   "Ori_2",
00124   "Ori_3",
00125   "Intens",
00126   "Flicker",
00127   "RG",
00128   "BY",
00129   "H1",
00130   "H2",
00131   "HS",
00132   "HV"
00133 };
00134 //@}
00135 
00136 
00137 /*************************************************************************/
00138 //! Control surprise in a movie
00139 template <class PIXTYPE, class BETATYPE, class FLOAT> class SurpriseControl
00140 {
00141 public:
00142   SurpriseControl(const ushort sizeX, const ushort sizeY);
00143   SurpriseControl();
00144   ~SurpriseControl();
00145   /** @name InputAndInit
00146    * These methods are used to init the object and input maps and variables
00147    * that affect the way SurpriseControl will work. These methods only need
00148    * to be called once
00149    */
00150   //@{
00151   //! Init the basic variables
00152   void SCinit(const ushort sizeX, const ushort sizeY);
00153   void SCsetMasterConspicBias(const FLOAT bias);
00154   //! use max level from conspicuity map rather than sum the levels
00155   void SCuseMaxLevel(const bool useML);
00156   //! Shoud we use the temporal component? default is yes
00157   void SCuseTemporal(const bool useTMP);
00158   //! normalize the biases with scale
00159   void SCnormalizeBiasWithScale(const bool useNBS);
00160   //! Tell us we are going to use this channel and set the bias
00161   void SCsetConspicBias(const FLOAT chan,
00162                         const int chan_enum);
00163   //! give me my scale if any
00164   void SCsetMyScale(const ushort scale);
00165   //! set the bias over a single axis
00166   void SCsetAxisBias(const FLOAT X, const FLOAT Y, const FLOAT Z);
00167   //! set the bias per H2SV channel on output
00168   void SCsetH2SVBias(const FLOAT H1, const FLOAT H2,
00169                      const FLOAT S,  const FLOAT V);
00170   //! set the decay term over the beta map
00171   void SCsetLambda(const FLOAT lambda);
00172   //! set which frame if any is a target frame
00173   void SCsetTargetFrame(const uint frame);
00174   //! set how much the original image will be combined back
00175   void SCsetOriginalImageWeight(const FLOAT origImageBias);
00176   //! create the seperable filters per image
00177   void SCcreateAndersonSepFilters(const ushort size);
00178   //! create the seperable filters per image
00179   void SCcreateSepFilters(const FLOAT spatSigma,
00180                           const FLOAT tempSigma,
00181                           const FLOAT stdDevSize);
00182   //! find parameters over convolution
00183   void SCfindConvolutionEndPoints();
00184   //@}
00185 
00186   /** @name InputPerFrame
00187    * These methods need to be feed data per frame such as the saliency map
00188    * and the raw image for each frame. These should be called each frame.
00189    */
00190   //@{
00191   //! input the raw image frame for processing
00192   void SCinputRawImage(const Image<PixRGB<FLOAT> >& rawImage);
00193   //! input the salmap for this frame
00194   void SCinputSalMap(const Image<FLOAT>& salMap);
00195   //! input the intensity conspicuity map for this frame
00196   void SCinputConspicMap(const Image<FLOAT>& cmap,const int cmap_enum);
00197   //@}
00198 
00199   /** @name InputOptional
00200    *  These methods allow optional masks or bayes weight images to be
00201    *  set
00202    */
00203   //@{
00204   //! Input a bayes weight image to bias surprise reduction
00205   void SCinputBayesWeightImage(const Image<FLOAT> &bayesImage);
00206   //! Input an independant mask to control where we apply the filter
00207   void SCinputMaskImage(const Image<FLOAT> &maskImage);
00208   //@}
00209 
00210   /** @name RunPerFrame
00211    *  Call to SCprocessFrameSeperable to run each frame. It should call
00212    *  SCcomputeNewBeta and SCseperateConv for you.
00213    */
00214   //@{
00215   //! compute new betaImage values
00216   void SCcomputeNewBeta();
00217   //! process a single frame of video using seperable filters
00218   void SCprocessFrameSeperable();
00219   //! find the local channel biases
00220   void SCcomputeLocalBias();
00221   //! Helper method to call SCseperateConv over x,y and z
00222   void SCseperateConvXYZ();
00223   //! Helper method to call SCseperateConv over x,y but not z
00224   void SCseperateConvXY();
00225   //! process each axis by itself
00226   void SCseperateConv(const char axis);
00227   //@}
00228 
00229   /** @name Output
00230    * Output from each frame can be obtained by calling these methods
00231    */
00232   //@{
00233   //! get the alternative surprise increased version of the image
00234   Image<PixRGB<FLOAT> > SCgetSharpened(const PIXTYPE scale_factor) const;
00235   //! process and return the final image
00236   Image<PixRGB<FLOAT> > SCgetFrame() const;
00237   //! process and return the filtered image
00238   Image<PixRGB<FLOAT> > SCgetOutImage() const;
00239   //! Return the temporal offset input image
00240   Image<PixRGB<FLOAT> > SCgetInImage() const;
00241   //! get the raw PIXTYPE output image
00242   Image<PIXTYPE> SCgetRawOutImage() const;
00243   //! get the input image converted as PIXTYPE
00244   Image<PIXTYPE> SCgetRawInImage() const;
00245   //! get the beta image of smoothed salMaps used in filter
00246   Image<BETATYPE> SCgetBetaImage() const;
00247   //! Get the temporal offset we are using
00248   ushort SCgetTemporalOffset() const;
00249   //! Is the output ready?
00250   bool   SCisOutputReady() const;
00251   //! Get the local bias maps proportional to effect
00252   void   SCgetLocalBiasImages(Image<FLOAT> &H1, Image<FLOAT> &H2,
00253                               Image<FLOAT> &S,  Image<FLOAT>  &V) const;
00254   //! Get the y and z parts of the seperable filter
00255   void   SCgetSeperableParts(Image<PIXTYPE> &Zimg, Image<PIXTYPE> &Yimg) const;
00256 
00257   //@}
00258 private:
00259   /** @name FrameBuffers
00260    *  These hold full frames to allow the temporal component to work
00261    *  The frame buffer holds raw frames while the beta image stores
00262    *  the fully weighted surprise bias that should correspond with the
00263    *  image
00264    */
00265   //@{
00266   //! The basic frame buffer, holds either smoothed or non-smoothed
00267   std::deque<Image<PIXTYPE> >  itsFrameBuffer;
00268   //! Pointer to the current frame in the frame buffer
00269   Image<PIXTYPE>               *itsFrameCurrent;
00270   //! Temporal smoothed surprise maps
00271   std::deque<Image<BETATYPE> > itsBetaImage;
00272   //! Pointer to the current frame in the beta image
00273   Image<BETATYPE>              *itsBetaCurrent;
00274   //@}
00275   /** @name MiscImageHolders
00276    *  These are images that hold a variety of intermediate images,
00277    *  biases and input-output images
00278    */
00279   //@{
00280   //! intermediate processed image
00281   std::vector<Image<PIXTYPE> >     itsInterImage;
00282   //! input maps of the post surprise processed image Direction
00283   Image<FLOAT>          itsConspicMap[SC_MAX_CHANNELS];
00284   //! Set of biases for each channel conspicuity map
00285   FLOAT                 itsConspicMapBias[SC_MAX_CHANNELS];
00286   //! are we using this channel, set by setting the bias
00287   bool                  itsUseConspicMap[SC_MAX_CHANNELS];
00288   //! The input for this frame
00289   Image<PIXTYPE> itsInImage;
00290   //! The output for this frame
00291   Image<PIXTYPE>        itsOutImage;
00292   //! The final image after we process by the surprise map
00293   Image<PixRGB<FLOAT> >         itsFinalImage;
00294   //@}
00295 
00296   /** @name BasicBiasImages
00297    *  These are images that store masks and biases used to weight the
00298       operations of the convolution operations. Per channel weights are
00299       stored as a betatype image.
00300   */
00301   //@{
00302   //! the current saliency map
00303   Image<FLOAT>           itsSalMap;
00304   //! Special independant mask image in addition to surprise mask
00305   Image<FLOAT>           itsMaskImage;
00306   //! input a bayesian bias image
00307   Image<FLOAT>                  itsBayesWeightImage;
00308   //! Local bias applied after accounting for channels
00309   Image<FLOAT>                  itsLocalBiasH1;
00310   //! Local bias applied after accounting for channels
00311   Image<FLOAT>                  itsLocalBiasH2;
00312   //! Local bias applied after accounting for channels
00313   Image<FLOAT>                  itsLocalBiasS;
00314   //! Local bias applied after accounting for channels
00315   Image<FLOAT>                  itsLocalBiasV;
00316   //@}
00317 
00318   /** @name ConvolutionGroup
00319    *  These are private members for storing pointers and indexes that
00320    *  provide a basis for convolution so that things such as offsets
00321    *  and kernel placement do not have to be computed on the fly
00322   */
00323   //@{
00324   //! store the start point for this convolution in X
00325   Image<ushort>                 itsXStart;
00326   //! store the start point for this convolution in Y
00327   Image<ushort>                 itsYStart;
00328   //! store the start point for this convolution in Z
00329   Image<ushort>                 itsZStart;
00330   //! store the stop point for this convolution in X
00331   Image<ushort>                 itsXStop;
00332   //! store the stop point for this convolution in Y
00333   Image<ushort>                 itsYStop;
00334   //! store the stop point for this convolution in Z
00335   Image<ushort>                 itsZStop;
00336   //! store the start point for this convolution in X
00337   Image<ushort>                 itsKXStart;
00338   //! store the start point for this convolution in Y
00339   Image<ushort>                 itsKYStart;
00340   //! store the start point for this convolution in Z
00341   Image<ushort>                 itsKZStart;
00342   //@}
00343 
00344   //! is true if saliency is very small at this location
00345   Image<bool>                   itsSmallSaliency;
00346   //! store messages that tell us it we did not init something
00347   std::vector<std::string>      itsInitMessage;
00348   /** @name KernelParts
00349    *  These are the three dimensions of a seperable convolution kernel
00350    */
00351   //@{
00352   //! store the X part of the kernel
00353   std::vector<FLOAT>            itsKalmanKernelX;
00354   //! store the Y part of the kernel
00355   std::vector<FLOAT>            itsKalmanKernelY;
00356   //! store the Z part of the kernel
00357   std::vector<FLOAT>            itsKalmanKernelZ;
00358   //@}
00359 
00360   //! store messages that tell us it we did not init something
00361   std::vector<bool>             itsInit;
00362   //! blank pixel used in run time typing
00363   PixHyper<FLOAT,SC_MAX_CHANNELS> itsHyper;
00364   //! set to true when the frame buffer is full
00365   bool                   itsBufferFull;
00366   bool                   itsInitBuffer;
00367   /** @name ConstantBiases
00368    *  These are the constant biases used to affect the whole process
00369    *  For instance, we can bias each axis of x,y and z independantly if we want
00370    *  or we can bias each channel.
00371    *
00372    */
00373   //@{
00374   //! the channel biases
00375   FLOAT                         itsConBias[SC_MAX_CHANNELS];
00376   //! A master bias which is applied equally to all channels
00377   FLOAT                         itsMasterConspicBias;
00378   //! The decay term over beta
00379   FLOAT                         itsLambda;
00380   //! Bias over the X axis
00381   FLOAT                         itsXBias;
00382   //! Bias over the Y axis
00383   FLOAT                         itsYBias;
00384   //! Bias over the Z axis
00385   FLOAT                         itsZBias;
00386   //! Bias to H1
00387   FLOAT                         itsH1Bias;
00388   //! Bias to H2
00389   FLOAT                         itsH2Bias;
00390   //! Bias to S
00391   FLOAT                         itsSBias;
00392   //! Bias to V
00393   FLOAT                         itsVBias;
00394   //! how much of the original image should we add back
00395   FLOAT                         itsOriginalImageWeight;
00396   //@}
00397 
00398   /** @name ConstantSizesAndOffsets
00399    *  These define constant sizes for the kernel and input image as well as
00400    *  offsets to target frame.
00401    */
00402   //@{
00403   //! What is the target frame if any
00404   uint                          itsTargetFrame;
00405   //! Internal Iteration counter
00406   uint                          itsIterCounter;
00407   //! the size of the X kernel
00408   ushort                        itsKernelSizeX;
00409   //! the size of the Y kernel
00410   ushort                        itsKernelSizeY;
00411   //! the size of the Z kernel
00412   ushort                        itsKernelSizeZ;
00413   //! the size of the image in X
00414   ushort                        itsImageSizeX;
00415   //! the size of the image in Y
00416   ushort                        itsImageSizeY;
00417   //! the size of the image in Z (deque size)
00418   ushort                        itsImageSizeZ;
00419   //! offset on when we start the temporal component
00420   ushort                        itsTemporalOffset;
00421   //! My scale if known
00422   ushort                        itsScale;
00423   //@}
00424 
00425   //! Use max or combined surprise values
00426   bool                          itsUseMaxLevel;
00427   //! Use correlation matrix for biased surprise removal
00428   bool                          itsUseCorrMatrixSet;
00429   //! turn on using bayesian weighting of surprise reduction
00430   bool                          itsUseBayesWeightImage;
00431   //! use another independant mask image
00432   bool                          itsUseMaskImage;
00433   //! Set to true if you have a target frame
00434   bool                          itsUseTargetFrame;
00435   //! are we ready to give output
00436   bool                          itsOutputReady;
00437   //! Should we use the temporal component or not?
00438   bool                          itsUseTemporal;
00439   //! Should we normalize the H1,H2,S and V bias with scale?
00440   bool                          itsNormalizeBiasWithScale;
00441 };
00442 #endif
00443