PyramidFeatureExtractor.C

Go to the documentation of this file.
00001 /*!@file TIGS/PyramidFeatureExtractor.C */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005   //
00005 // by the University of Southern California (USC) and the iLab at USC.  //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: Rob Peters <rjpeters at usc dot edu>
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/TIGS/PyramidFeatureExtractor.C $
00035 // $Id: PyramidFeatureExtractor.C 12546 2010-01-12 15:46:00Z sychic $
00036 //
00037 
00038 #ifndef TIGS_PYRAMIDFEATUREEXTRACTOR_C_DEFINED
00039 #define TIGS_PYRAMIDFEATUREEXTRACTOR_C_DEFINED
00040 
00041 #include "TIGS/PyramidFeatureExtractor.H"
00042 
00043 #include "Image/CutPaste.H"
00044 #include "Image/ImageSet.H"
00045 #include "Image/PyramidOps.H"
00046 #include "Image/ShapeOps.H" // for downSize()
00047 #include "TIGS/Drawing.H"
00048 #include "TIGS/TigsOpts.H"
00049 #include "Transport/FrameOstream.H"
00050 #include "Util/sformat.H"
00051 #include "rutz/trace.h"
00052 
00053 namespace
00054 {
00055   float* insertLocalAvg(const Image<float>& img,
00056                         float* p, float* const stop,
00057                         const double factor)
00058   {
00059     Image<float> ds = downSize(img, 4, 4, 3);
00060 
00061     for (int i = 0; i < ds.getSize() && p < stop; ++i, ++p)
00062       *p = factor * ds[i];
00063 
00064     return p;
00065   }
00066 
00067   float* insertLocalMax(const Image<float>& img,
00068                         float* p, float* const stop,
00069                         const double factor)
00070   {
00071 
00072     if (p+16 > stop)
00073       return p;
00074 
00075     for (int y = 0; y < img.getHeight(); ++y)
00076       for (int x = 0; x < img.getWidth(); ++x)
00077         {
00078           const float val = factor * img[Point2D<int>(x,y)];
00079           const int pos =
00080             (y*4 / img.getHeight()) * 4 +
00081             (x*4 / img.getWidth());
00082 
00083           p[pos] = std::max(p[pos], val);
00084         }
00085 
00086     return p+16;
00087   }
00088 
00089   float* insertLocalVar(const Image<float>& img,
00090                         float* p, float* const stop,
00091                         const double factor)
00092   {
00093 
00094     if (p+16 > stop)
00095       return p;
00096 
00097     float ss[16] = { 0.0f }, ssq[16] = { 0.0f };
00098     int N[16] = { 0 };
00099 
00100     for (int y = 0; y < img.getHeight(); ++y)
00101       for (int x = 0; x < img.getWidth(); ++x)
00102         {
00103           const double val = img[Point2D<int>(x,y)];
00104           const int pos =
00105             (y*4 / img.getHeight()) * 4 +
00106             (x*4 / img.getWidth());
00107 
00108           ssq[pos] += val*val;
00109           ss[pos] += val;
00110           ++N[pos];
00111         }
00112 
00113     for (int i = 0; i < 16; ++i)
00114       {
00115         if (N[i] > 1 && (ss[i]/N[i]) > 0.0f)
00116           {
00117             const float numer1 = (ssq[i] - (ss[i]*ss[i]/N[i]));
00118             const float denom1 = (N[i]-1);
00119 
00120             if (numer1 > 0.0f && denom1 > 0.0f)
00121               // coefficient of variation = 100*stdev/mean
00122               p[i] = factor*100.0f*sqrt(numer1/denom1)/(ss[i]/N[i]);
00123           }
00124         else
00125           {
00126             p[i] = 0.0f;
00127           }
00128       }
00129 
00130     return p+16;
00131   }
00132 
00133   float* insertPyrFeatures(const ImageSet<float>& pyr,
00134                            float* p, float* const stop,
00135                            const double factor)
00136   {
00137     p = insertLocalAvg(pyr[2], p, stop, factor);
00138     p = insertLocalVar(pyr[2], p, stop, 1.5);
00139     p = insertLocalAvg(pyr[5], p, stop, factor);
00140     p = insertLocalVar(pyr[5], p, stop, 1.5);
00141 
00142     return p;
00143   }
00144 
00145   Image<PixRGB<byte> > illustrate1(const ImageSet<float>& pyr,
00146                                    const double factor,
00147                                    const char* name,
00148                                    const PixRGB<byte>& bg)
00149   {
00150     using tigs::labelImage;
00151     using tigs::boxify;
00152 
00153     const PixRGB<byte> red(255, 64, 64);
00154     const PixRGB<byte> green(96, 192, 96);
00155     const PixRGB<byte> blue(128, 128, 255);
00156     const PixRGB<byte> yellow(160, 160, 0);
00157 
00158     const Image<PixRGB<byte> > top = // 256x256
00159       labelImage(boxify(pyr[1] * float(factor), 8, green),
00160                  name, green, bg);
00161 
00162     const Image<PixRGB<byte> > mid = // 256x128
00163       concatX(labelImage(boxify(pyr[2] * float(factor), 4, yellow), "fine", yellow, bg),
00164               labelImage(boxify(zoomXY(pyr[5] * float(factor), 8, 8), 4, yellow), "coarse", yellow, bg));
00165 
00166     Image<float> avg2(4,4,NO_INIT);
00167     Image<float> var2(4,4,NO_INIT);
00168     Image<float> avg5(4,4,NO_INIT);
00169     Image<float> var5(4,4,NO_INIT);
00170 
00171     insertLocalAvg(pyr[2], &avg2[0], &avg2[0]+16, factor);
00172     insertLocalVar(pyr[2], &var2[0], &var2[0]+16, 1.5);
00173     insertLocalAvg(pyr[5], &avg5[0], &avg5[0]+16, factor);
00174     insertLocalVar(pyr[5], &var5[0], &var5[0]+16, 1.5);
00175 
00176     Image<PixRGB<byte> > cavg2 = avg2;
00177     Image<PixRGB<byte> > cvar2 = var2;
00178     Image<PixRGB<byte> > cavg5 = avg5;
00179     Image<PixRGB<byte> > cvar5 = var5;
00180 
00181     const Image<PixRGB<byte> > low2 =
00182       concatX(labelImage(boxify(zoomXY(cavg2, 16, 16), 2, red), "mean", red, bg),
00183               labelImage(boxify(zoomXY(cvar2, 16, 16), 2, blue), "var", blue, bg));
00184 
00185     const Image<PixRGB<byte> > low5 =
00186       concatX(labelImage(boxify(zoomXY(cavg5, 16, 16), 2, red), "mean", red, bg),
00187               labelImage(boxify(zoomXY(cvar5, 16, 16), 2, blue), "var", blue, bg));
00188 
00189     const Image<PixRGB<byte> > low = concatX(low2, low5);
00190 
00191     return concatY(concatY(top, mid), low);
00192   }
00193 
00194   void saveRaw1(const ImageSet<float>& pyr,
00195                 const double factor,
00196                 const char* name,
00197                 FrameOstream& ofs)
00198   {
00199     // ofs.writeFloat(pyr[0], FLOAT_NORM_PRESERVE,
00200     //             (sformat("%s-base", name)));
00201 
00202     //  ofs.writeFloat(pyr[2], FLOAT_NORM_PRESERVE,
00203     //               (sformat("%s-fine", name)));
00204 
00205   // ofs.writeFloat(pyr[5], FLOAT_NORM_PRESERVE,
00206     //             (sformat("%s-coarse", name)));
00207 
00208     Image<float> avg2(4,4,NO_INIT);
00209     Image<float> var2(4,4,NO_INIT);
00210     Image<float> avg5(4,4,NO_INIT);
00211     Image<float> var5(4,4,NO_INIT);
00212 
00213     insertLocalAvg(pyr[2], &avg2[0], &avg2[0]+16, factor);
00214     insertLocalVar(pyr[2], &var2[0], &var2[0]+16, 1.5);
00215     insertLocalAvg(pyr[5], &avg5[0], &avg5[0]+16, factor);
00216     insertLocalVar(pyr[5], &var5[0], &var5[0]+16, 1.5);
00217 
00218     ofs.writeFloat(avg2, FLOAT_NORM_PRESERVE,
00219                    (sformat("%s-fine-avg", name)));
00220 
00221     ofs.writeFloat(var2, FLOAT_NORM_PRESERVE,
00222                    (sformat("%s-fine-var", name)));
00223 
00224     ofs.writeFloat(avg5, FLOAT_NORM_PRESERVE,
00225                    (sformat("%s-coarse-avg", name)));
00226 
00227     ofs.writeFloat(var5, FLOAT_NORM_PRESERVE,
00228                    (sformat("%s-coarse-var", name)));
00229   }
00230 }
00231 
00232 PyramidFeatureExtractor::PyramidFeatureExtractor(OptionManager& mgr) :
00233   FeatureExtractor(mgr, "pfx"),
00234   itsSaveIllustrations(&OPT_FxSaveIllustrations, this),
00235   itsSaveRawMaps(&OPT_FxSaveRawMaps, this)
00236 {}
00237 
00238 PyramidFeatureExtractor::~PyramidFeatureExtractor() {}
00239 
00240 Image<PixRGB<byte> > PyramidFeatureExtractor::
00241 illustrate(const TigsInputFrame& fin) const
00242 {
00243   if (fin.isGhost())
00244     LFATAL("PyramidFeatureExtractor needs non-ghost frames");
00245 
00246   const PixRGB<byte> bg(255, 255, 255);
00247 
00248   Image<PixRGB<byte> > top =
00249     illustrate1(buildPyrGaussian(fin.lum(), 0, 10, 9), 1.0, "luminance", bg);
00250 
00251   top = concatX(top,
00252                 illustrate1(buildPyrGaussian(fin.rg(), 0, 10, 9), 1.0, "red/green", bg));
00253 
00254   top = concatX(top,
00255                 illustrate1(buildPyrGaussian(fin.by(), 0, 10, 9), 1.0, "blue/yellow", bg));
00256 
00257   const double angles[] = { 0.0, 45.0, 90.0, 135.0 };
00258 
00259   Image<PixRGB<byte> > bottom;
00260 
00261   for (int i = 0; i < 4; i += 2)
00262     {
00263       Image<PixRGB<byte> > x =
00264         illustrate1(buildPyrOriented(fin.lum(), 0, 10, 9,
00265                                      angles[i], 15.0),
00266                     5.0,
00267                     sformat("%d degrees", int(angles[i])).c_str(), bg);
00268 
00269       if (bottom.initialized())
00270         bottom = concatX(bottom, x);
00271       else
00272         bottom = x;
00273     }
00274 
00275   Image<PixRGB<byte> > result = concatX(top, bottom);
00276   return rescale(result,
00277                  int(0.75*result.getWidth()),
00278                  int(0.75*result.getHeight()));
00279 }
00280 
00281 void PyramidFeatureExtractor::
00282 saveRawIllustrationParts(const TigsInputFrame& fin,
00283                          FrameOstream& ofs) const
00284 {
00285   saveRaw1(buildPyrGaussian(fin.lum(), 0, 10, 9), 1.0, "luminance", ofs);
00286 
00287   saveRaw1(buildPyrGaussian(fin.rg(), 0, 10, 9), 1.0, "red-green", ofs);
00288 
00289   saveRaw1(buildPyrGaussian(fin.by(), 0, 10, 9), 1.0, "blue-yellow", ofs);
00290 
00291   const double angles[] = { 0.0, 45.0, 90.0, 135.0 };
00292 
00293   for (int i = 0; i < 4; ++i)
00294     {
00295       saveRaw1(buildPyrOriented(fin.lum(), 0, 10, 9,
00296                                 angles[i], 15.0),
00297                5.0,
00298                sformat("ori%d", int(angles[i])).c_str(),
00299                ofs);
00300     }
00301 }
00302 
00303 void PyramidFeatureExtractor::saveResults(const TigsInputFrame& fin,
00304                                           FrameOstream& ofs) const
00305 {
00306   if (itsSaveIllustrations.getVal())
00307     ofs.writeRGB(this->illustrate(fin), "pfx");
00308 
00309   if (itsSaveRawMaps.getVal())
00310     this->saveRawIllustrationParts(fin, ofs);
00311 }
00312 
00313 Image<float> PyramidFeatureExtractor::doExtract(const TigsInputFrame& fin)
00314 {
00315   GVX_TRACE(__PRETTY_FUNCTION__);
00316 
00317   if (fin.isGhost())
00318     LFATAL("PyramidFeatureExtractor needs non-ghost frames");
00319 
00320   Image<float> result(4, 112, ZEROS);
00321 
00322   float*       p    = result.getArrayPtr();
00323   float* const stop = p + result.getSize();
00324 
00325   {
00326     GVX_TRACE("PyramidFeatureExtractor::extract-laplacian");
00327 
00328     p = insertPyrFeatures(buildPyrGaussian(fin.lum(), 0, 10, 9),
00329                           p, stop, 1.0);
00330 
00331     p = insertPyrFeatures(buildPyrGaussian(fin.rg(), 0, 10, 9),
00332                           p, stop, 1.0);
00333 
00334     p = insertPyrFeatures(buildPyrGaussian(fin.by(), 0, 10, 9),
00335                           p, stop, 1.0);
00336   }
00337 
00338   const double angles[] = { 0.0, 45.0, 90.0, 135.0 };
00339 
00340   for (int i = 0; i < 4; ++i)
00341     {
00342       GVX_TRACE("PyramidFeatureExtractor::extract-oriented");
00343 
00344       p = insertPyrFeatures(buildPyrOriented(fin.lum(), 0, 10, 9,
00345                                              angles[i], 15.0),
00346                             p, stop, 15.0);
00347     }
00348 
00349   return result;
00350 }
00351 
00352 // ######################################################################
00353 /* So things look consistent in everyone's emacs... */
00354 /* Local Variables: */
00355 /* mode: c++ */
00356 /* indent-tabs-mode: nil */
00357 /* End: */
00358 
00359 #endif // TIGS_PYRAMIDFEATUREEXTRACTOR_C_DEFINED