00001 /*!@file Neuro/SimulationViewerCompress.H multi-foveated saliency-based 00002 compression */ 00003 00004 // //////////////////////////////////////////////////////////////////// // 00005 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2003 // 00006 // by the University of Southern California (USC) and the iLab at USC. // 00007 // See http://iLab.usc.edu for information about this project. // 00008 // //////////////////////////////////////////////////////////////////// // 00009 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00010 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00011 // in Visual Environments, and Applications'' by Christof Koch and // 00012 // Laurent Itti, California Institute of Technology, 2001 (patent // 00013 // pending; application number 09/912,225 filed July 23, 2001; see // 00014 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00015 // //////////////////////////////////////////////////////////////////// // 00016 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00017 // // 00018 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00019 // redistribute it and/or modify it under the terms of the GNU General // 00020 // Public License as published by the Free Software Foundation; either // 00021 // version 2 of the License, or (at your option) any later version. // 00022 // // 00023 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00024 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00025 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00026 // PURPOSE. See the GNU General Public License for more details. // 00027 // // 00028 // You should have received a copy of the GNU General Public License // 00029 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00030 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00031 // Boston, MA 02111-1307 USA. // 00032 // //////////////////////////////////////////////////////////////////// // 00033 // 00034 // Primary maintainer for this file: Laurent Itti <itti@usc.edu> 00035 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Neuro/SimulationViewerCompress.H $ 00036 // $Id: SimulationViewerCompress.H 11546 2009-07-31 18:19:09Z lzc $ 00037 // 00038 00039 #ifndef SIMULATIONVIEWERCOMPRESS_H_DEFINED 00040 #define SIMULATIONVIEWERCOMPRESS_H_DEFINED 00041 00042 #include "Component/ModelParam.H" 00043 #include "Image/ImageCache.H" 00044 #include "Image/ImageSet.H" 00045 #include "Image/LevelSpec.H" 00046 #include "Neuro/NeuroSimEvents.H" 00047 #include "Neuro/SimulationViewer.H" 00048 #include "Simulation/SimEvents.H" 00049 #include "Util/SimTime.H" 00050 00051 #include <vector> 00052 00053 class SaccadeController; 00054 class SpatialMetrics; 00055 00056 //! Do multi-foveated saliency-based image compression 00057 /*! This viewer will consider the top N salient locations and apply 00058 blurring to the image that increases in strength (sigma) as we get 00059 farther away from any of the top N most salient locations. The 00060 resulting trajectory image hence will be crisp at the top N 00061 locations and increasingly blurred away from those. Compressing the 00062 output of this viewer, e.g., using MPEG, yields smaller file size 00063 while (hopefully) preserving high quality at the important image 00064 locations. This viewer relies on a collection of N 00065 SaccadeControllers to track the N hotspots; you can pick which type 00066 of controller to use and configure their parameters as usual, via 00067 the command line. Note that in programs like ezvision that use an 00068 StdBrain, the StdBrain also contains a SaccadeController; but here 00069 we will explicitly drop it, as we won;t be using Brain's controller, 00070 since we will decide on the N hotspots here based onthe raw saliency 00071 map. We will also drop Brain's ShapeEstimator and force Brain's IOR 00072 type to none, so that we have a clean saliency map here to work 00073 with. */ 00074 class SimulationViewerCompress : public SimulationViewer { 00075 public: 00076 // ###################################################################### 00077 /*! @name Constructors and destructors */ 00078 //@{ 00079 00080 //! Constructor. See ModelComponent.H. 00081 SimulationViewerCompress(OptionManager& mgr, 00082 const std::string& descrName = 00083 "Multi-Foveated Compression Simulation Viewer", 00084 const std::string& tagName = 00085 "SimulationViewerCompress"); 00086 00087 //! Destructor 00088 virtual ~SimulationViewerCompress(); 00089 00090 //@} 00091 00092 protected: 00093 //! Callback for when a new retina image is available 00094 SIMCALLBACK_DECLARE(SimulationViewerCompress, SimEventRetinaImage); 00095 00096 //! Callback for when the eye moves 00097 SIMCALLBACK_DECLARE(SimulationViewerCompress, SimEventSaccadeStatusEye); 00098 00099 //! Callback for every time we should save our outputs 00100 SIMCALLBACK_DECLARE(SimulationViewerCompress, SimEventSaveOutput); 00101 00102 //! Get the attention/eye/head trajectory image 00103 Image< PixRGB<byte> > getTraj(SimEventQueue& q); 00104 00105 OModelParam<int> itsFOAradius; //!< FOA radius for object trackers 00106 OModelParam<int> itsNumFoveas; //!< number of foveas 00107 OModelParam<bool> itsSaveTraj; //!< save trajectory? 00108 OModelParam<bool> itsSaveMegaCombo; //!< save mega combo? 00109 OModelParam<bool> itsSaveMask; //!< save mask? 00110 OModelParam<bool> itsSaveFoveatedImage; //!< save foveated image? 00111 OModelParam<float> itsDistanceFactor; //!< distance factor to change the fovea size 00112 OModelParam<bool> itsSaveEyeCombo; //!< save eye combo? 00113 OModelParam<bool> itsDisplayPatch; //!< draw patches 00114 OModelParam<bool> itsDisplayFOA; //!< draw objetc outlines 00115 OModelParam<bool> itsDisplayEye; //!< draw human eye movements 00116 NModelParam< PixRGB<byte> > itsColorNormal; //!< patch color 00117 NModelParam< PixRGB<byte> > itsColorEye; //!< patch color for human eye mvts 00118 OModelParam<int> itsHeadRadius; //!< head radius 00119 OModelParam<int> itsMultiRetinaDepth; //!< depth of blur pyramid 00120 OModelParam<int> itsCacheSize; //!< size of our mask cache 00121 OModelParam<bool> itsUseTRMmax; //!< use TRM to take max in cache 00122 OModelParam<std::string> itsFoveaSCtype; //!< type of SC for foveas 00123 OModelParam<std::string> itsOutFname; //!< Name of output file 00124 OModelParam<LevelSpec> itsLevelSpec; //!< our levelspec 00125 OModelParam<int> itsNumRandomSamples; //!< number of random samples 00126 OModelParam<bool> itsEyeCompare; //!< do the eye compare with the mask 00127 00128 //! This parameter is the period (in frames) for foveation mask to change 00129 /*! The idea here is to see whether keeping the foveation mask 00130 stable for predicted frames and changing it only for intra-coded 00131 frames may improve the compression ratio when using MPEG-1 00132 compression. Have a look at 00133 http://www.disctronics.co.uk/technology/video/video_mpeg.htm if 00134 you are not familiar with those various types of frames. In 00135 particular (quoted from that web site): 00136 00137 "I-frames (Intra coded frames) use DCT encoding only to compress a 00138 single frame without reference to any other frame in the sequence. 00139 [...] 00140 P-frames (Predicted frames) are coded as differences from the last 00141 I or P frame. The new P-frame is first predicted by taking the 00142 last I or P frame and 'predicting' the values of each new 00143 pixel. P-frames use Motion Prediction and DCT encoding. As a 00144 result P-frames will give a compression ratio better than I-frames 00145 but depending on the amount of motion present. The differences 00146 between the predicted and actual values are encoded. [...] 00147 B-frames (Bidirectional frames) are coded as differences from the 00148 last or next I or P frame. B-frames use prediction as for P-frames 00149 but for each block either the previous I or P frame is used or the 00150 next I or P frame. [...]" 00151 00152 So, in our case, changing the foveation mask on a P or B frame may 00153 yield lots of prediction errors, that can be reduced if we force 00154 the mask to be only allowed to change on I frames. Here we assume 00155 that the first frame is an I-frame. */ 00156 OModelParam<int> itsIFramePeriod; 00157 00158 //! Get started and disable any SC the Brain may want to use 00159 virtual void start1(); 00160 00161 //! get stopped 00162 virtual void stop1(); 00163 00164 //! Intercept people changing our number of foveas 00165 virtual void paramChanged(ModelParamBase* const param, 00166 const bool valueChanged, 00167 ParamClient::ChangeStatus* status); 00168 00169 private: 00170 ImageSet< PixRGB<byte> > itsMultiTraj; // used to foveate traj 00171 std::vector< nub::soft_ref<SaccadeController> > itsSC; // our SCs 00172 SimTime itsInputTime; // time of last input 00173 int itsFrame; // keep track of frame number 00174 00175 void buildSCC(); // build our SaccadeControllerConfigurators 00176 00177 float getSample(const Image<float>& smap, const Point2D<int>& p, 00178 const int radius) const; 00179 float getRandomSample(const Image<float>& smap, 00180 const int radius, const int n) const; 00181 00182 // create mask from SCs 00183 Image<byte> getMaskSC(const Image<float>& smf, SimEventQueue& q); 00184 00185 // create mask from SM 00186 Image<byte> getMaskSM(const Image<float>& smf); 00187 00188 // keep track of features being tracked by each SC: 00189 std::vector< std::vector<float> > itsFeatures; 00190 00191 // use a sliding image cache for our foveation masks: 00192 ImageCacheAvg<byte> itsMask; 00193 00194 // ignore SCs that have too low salience: 00195 std::vector<bool> itsIgnoreSC; 00196 00197 Image<byte> itsCurrentMask; // last I-frame mask computed 00198 00199 // eye movement comparison stuff: 00200 FILE *itsOutFile; 00201 00202 Image<byte> itsBlurMask; // final current blur mask to use 00203 std::deque<Point2D<int> > itsEyeData; // queued-up eye movement data over 1 frame 00204 00205 Image<PixRGB<byte> > itsRawInputCopy; //FIXME 00206 Rectangle itsRawInputRectangle; //FIXME 00207 }; 00208 00209 #endif 00210 00211 // ###################################################################### 00212 /* So things look consistent in everyone's emacs... */ 00213 /* Local Variables: */ 00214 /* indent-tabs-mode: nil */ 00215 /* End: */