SimulationViewerCompress.H

Go to the documentation of this file.
00001 /*!@file Neuro/SimulationViewerCompress.H multi-foveated saliency-based
00002   compression */
00003 
00004 // //////////////////////////////////////////////////////////////////// //
00005 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2003   //
00006 // by the University of Southern California (USC) and the iLab at USC.  //
00007 // See http://iLab.usc.edu for information about this project.          //
00008 // //////////////////////////////////////////////////////////////////// //
00009 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00010 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00011 // in Visual Environments, and Applications'' by Christof Koch and      //
00012 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00013 // pending; application number 09/912,225 filed July 23, 2001; see      //
00014 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00015 // //////////////////////////////////////////////////////////////////// //
00016 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00017 //                                                                      //
00018 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00019 // redistribute it and/or modify it under the terms of the GNU General  //
00020 // Public License as published by the Free Software Foundation; either  //
00021 // version 2 of the License, or (at your option) any later version.     //
00022 //                                                                      //
00023 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00024 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00025 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00026 // PURPOSE.  See the GNU General Public License for more details.       //
00027 //                                                                      //
00028 // You should have received a copy of the GNU General Public License    //
00029 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00030 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00031 // Boston, MA 02111-1307 USA.                                           //
00032 // //////////////////////////////////////////////////////////////////// //
00033 //
00034 // Primary maintainer for this file: Laurent Itti <itti@usc.edu>
00035 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Neuro/SimulationViewerCompress.H $
00036 // $Id: SimulationViewerCompress.H 11546 2009-07-31 18:19:09Z lzc $
00037 //
00038 
00039 #ifndef SIMULATIONVIEWERCOMPRESS_H_DEFINED
00040 #define SIMULATIONVIEWERCOMPRESS_H_DEFINED
00041 
00042 #include "Component/ModelParam.H"
00043 #include "Image/ImageCache.H"
00044 #include "Image/ImageSet.H"
00045 #include "Image/LevelSpec.H"
00046 #include "Neuro/NeuroSimEvents.H"
00047 #include "Neuro/SimulationViewer.H"
00048 #include "Simulation/SimEvents.H"
00049 #include "Util/SimTime.H"
00050 
00051 #include <vector>
00052 
00053 class SaccadeController;
00054 class SpatialMetrics;
00055 
00056 //! Do multi-foveated saliency-based image compression
00057 /*! This viewer will consider the top N salient locations and apply
00058   blurring to the image that increases in strength (sigma) as we get
00059   farther away from any of the top N most salient locations. The
00060   resulting trajectory image hence will be crisp at the top N
00061   locations and increasingly blurred away from those. Compressing the
00062   output of this viewer, e.g., using MPEG, yields smaller file size
00063   while (hopefully) preserving high quality at the important image
00064   locations. This viewer relies on a collection of N
00065   SaccadeControllers to track the N hotspots; you can pick which type
00066   of controller to use and configure their parameters as usual, via
00067   the command line. Note that in programs like ezvision that use an
00068   StdBrain, the StdBrain also contains a SaccadeController; but here
00069   we will explicitly drop it, as we won;t be using Brain's controller,
00070   since we will decide on the N hotspots here based onthe raw saliency
00071   map. We will also drop Brain's ShapeEstimator and force Brain's IOR
00072   type to none, so that we have a clean saliency map here to work
00073   with. */
00074 class SimulationViewerCompress : public SimulationViewer {
00075 public:
00076   // ######################################################################
00077   /*! @name Constructors and destructors */
00078   //@{
00079 
00080   //! Constructor. See ModelComponent.H.
00081   SimulationViewerCompress(OptionManager& mgr,
00082                            const std::string& descrName =
00083                            "Multi-Foveated Compression Simulation Viewer",
00084                            const std::string& tagName =
00085                            "SimulationViewerCompress");
00086 
00087   //! Destructor
00088   virtual ~SimulationViewerCompress();
00089 
00090   //@}
00091 
00092 protected:
00093   //! Callback for when a new retina image is available
00094   SIMCALLBACK_DECLARE(SimulationViewerCompress, SimEventRetinaImage);
00095 
00096   //! Callback for when the eye moves
00097   SIMCALLBACK_DECLARE(SimulationViewerCompress, SimEventSaccadeStatusEye);
00098 
00099   //! Callback for every time we should save our outputs
00100   SIMCALLBACK_DECLARE(SimulationViewerCompress, SimEventSaveOutput);
00101 
00102   //! Get the attention/eye/head trajectory image
00103   Image< PixRGB<byte> > getTraj(SimEventQueue& q);
00104 
00105   OModelParam<int> itsFOAradius;        //!< FOA radius for object trackers
00106   OModelParam<int> itsNumFoveas;        //!< number of foveas
00107   OModelParam<bool> itsSaveTraj;        //!< save trajectory?
00108   OModelParam<bool> itsSaveMegaCombo;   //!< save mega combo?
00109   OModelParam<bool> itsSaveMask;        //!< save mask?
00110   OModelParam<bool> itsSaveFoveatedImage;   //!< save foveated image?
00111   OModelParam<float> itsDistanceFactor;     //!< distance factor to change the fovea size
00112   OModelParam<bool> itsSaveEyeCombo;    //!< save eye combo?
00113   OModelParam<bool> itsDisplayPatch;    //!< draw patches
00114   OModelParam<bool> itsDisplayFOA;      //!< draw objetc outlines
00115   OModelParam<bool> itsDisplayEye;      //!< draw human eye movements
00116   NModelParam< PixRGB<byte> > itsColorNormal; //!< patch color
00117   NModelParam< PixRGB<byte> > itsColorEye;  //!< patch color for human eye mvts
00118   OModelParam<int> itsHeadRadius;           //!< head radius
00119   OModelParam<int> itsMultiRetinaDepth;     //!< depth of blur pyramid
00120   OModelParam<int> itsCacheSize;            //!< size of our mask cache
00121   OModelParam<bool> itsUseTRMmax;           //!< use TRM to take max in cache
00122   OModelParam<std::string> itsFoveaSCtype;  //!< type of SC for foveas
00123   OModelParam<std::string> itsOutFname;     //!< Name of output file
00124   OModelParam<LevelSpec> itsLevelSpec;      //!< our levelspec
00125   OModelParam<int> itsNumRandomSamples;     //!< number of random samples
00126   OModelParam<bool> itsEyeCompare;          //!< do the eye compare with the mask
00127 
00128   //! This parameter is the period (in frames) for foveation mask to change
00129   /*! The idea here is to see whether keeping the foveation mask
00130     stable for predicted frames and changing it only for intra-coded
00131     frames may improve the compression ratio when using MPEG-1
00132     compression. Have a look at
00133     http://www.disctronics.co.uk/technology/video/video_mpeg.htm if
00134     you are not familiar with those various types of frames. In
00135     particular (quoted from that web site):
00136 
00137     "I-frames (Intra coded frames) use DCT encoding only to compress a
00138     single frame without reference to any other frame in the sequence.
00139     [...]
00140     P-frames (Predicted frames) are coded as differences from the last
00141     I or P frame. The new P-frame is first predicted by taking the
00142     last I or P frame and 'predicting' the values of each new
00143     pixel. P-frames use Motion Prediction and DCT encoding. As a
00144     result P-frames will give a compression ratio better than I-frames
00145     but depending on the amount of motion present. The differences
00146     between the predicted and actual values are encoded. [...]
00147     B-frames (Bidirectional frames) are coded as differences from the
00148     last or next I or P frame. B-frames use prediction as for P-frames
00149     but for each block either the previous I or P frame is used or the
00150     next I or P frame. [...]"
00151 
00152     So, in our case, changing the foveation mask on a P or B frame may
00153     yield lots of prediction errors, that can be reduced if we force
00154     the mask to be only allowed to change on I frames. Here we assume
00155     that the first frame is an I-frame. */
00156   OModelParam<int> itsIFramePeriod;
00157 
00158   //! Get started and disable any SC the Brain may want to use
00159   virtual void start1();
00160 
00161   //! get stopped
00162   virtual void stop1();
00163 
00164   //! Intercept people changing our number of foveas
00165   virtual void paramChanged(ModelParamBase* const param,
00166                             const bool valueChanged,
00167                             ParamClient::ChangeStatus* status);
00168 
00169 private:
00170   ImageSet< PixRGB<byte> > itsMultiTraj; // used to foveate traj
00171   std::vector< nub::soft_ref<SaccadeController> > itsSC; // our SCs
00172   SimTime itsInputTime;                   // time of last input
00173   int itsFrame;                          // keep track of frame number
00174 
00175   void buildSCC(); // build our SaccadeControllerConfigurators
00176 
00177   float getSample(const Image<float>& smap, const Point2D<int>& p,
00178                   const int radius) const;
00179   float getRandomSample(const Image<float>& smap,
00180                         const int radius, const int n) const;
00181 
00182   // create mask from SCs
00183   Image<byte> getMaskSC(const Image<float>& smf, SimEventQueue& q);
00184 
00185   // create mask from SM
00186   Image<byte> getMaskSM(const Image<float>& smf);
00187 
00188   // keep track of features being tracked by each SC:
00189   std::vector< std::vector<float> > itsFeatures;
00190 
00191   // use a sliding image cache for our foveation masks:
00192   ImageCacheAvg<byte> itsMask;
00193 
00194   // ignore SCs that have too low salience:
00195   std::vector<bool> itsIgnoreSC;
00196 
00197   Image<byte> itsCurrentMask;  // last I-frame mask computed
00198 
00199   // eye movement comparison stuff:
00200   FILE *itsOutFile;
00201 
00202   Image<byte> itsBlurMask;  // final current blur mask to use
00203   std::deque<Point2D<int> > itsEyeData;  // queued-up eye movement data over 1 frame
00204 
00205   Image<PixRGB<byte> > itsRawInputCopy; //FIXME
00206   Rectangle itsRawInputRectangle; //FIXME
00207 };
00208 
00209 #endif
00210 
00211 // ######################################################################
00212 /* So things look consistent in everyone's emacs... */
00213 /* Local Variables: */
00214 /* indent-tabs-mode: nil */
00215 /* End: */