test-GistRec.C

00001 /*!@file Learn/test-gistRec.C
00002  */
00003 
00004 // //////////////////////////////////////////////////////////////////// //
00005 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the //
00006 // University of Southern California (USC) and the iLab at USC.         //
00007 // See http://iLab.usc.edu for information about this project.          //
00008 // //////////////////////////////////////////////////////////////////// //
00009 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00010 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00011 // in Visual Environments, and Applications'' by Christof Koch and      //
00012 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00013 // pending; application number 09/912,225 filed July 23, 2001; see      //
00014 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00015 // //////////////////////////////////////////////////////////////////// //
00016 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00017 //                                                                      //
00018 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00019 // redistribute it and/or modify it under the terms of the GNU General  //
00020 // Public License as published by the Free Software Foundation; either  //
00021 // version 2 of the License, or (at your option) any later version.     //
00022 //                                                                      //
00023 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00024 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00025 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00026 // PURPOSE.  See the GNU General Public License for more details.       //
00027 //                                                                      //
00028 // You should have received a copy of the GNU General Public License    //
00029 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00030 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00031 // Boston, MA 02111-1307 USA.                                           //
00032 // //////////////////////////////////////////////////////////////////// //
00033 //
00034 // Primary maintainer for this file: Lior Elazary <elazary@usc.edu>
00035 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/ObjRec/test-GistRec.C $
00036 // $Id: test-GistRec.C 10794 2009-02-08 06:21:09Z itti $
00037 //
00038 
00039 #include "Component/ModelManager.H"
00040 #include "Image/Image.H"
00041 #include "Image/Pixels.H"
00042 #include "Image/ColorOps.H"
00043 #include "Image/ShapeOps.H"
00044 #include "Image/MathOps.H"
00045 #include "Image/CutPaste.H"
00046 #include "Image/DrawOps.H"
00047 #include "Image/FilterOps.H"
00048 #include "Image/Transforms.H"
00049 #include "Media/FrameSeries.H"
00050 #include "Raster/Raster.H"
00051 #include "Util/log.H"
00052 #include "Util/MathFunctions.H"
00053 #include "Learn/SOFM.H"
00054 #include "Transport/FrameInfo.H"
00055 #include "Raster/GenericFrame.H"
00056 #include "Media/TestImages.H"
00057 
00058 void findMinMax(const std::vector<double> &vec, double &min, double &max)
00059 {
00060   max = vec[0];
00061   min = max;
00062   for (uint n = 1 ; n < vec.size() ; n++)
00063   {
00064     if (vec[n] > max) max = vec[n];
00065     if (vec[n] < min) min = vec[n];
00066   }
00067 }
00068 
00069 Image<PixRGB<byte> > showHist(const std::vector<double> &hist, int loc)
00070 {
00071   int w = 256, h = 256;
00072   if (hist.size() > (uint)w) w = hist.size();
00073 
00074   if (hist.size() == 0) return Image<PixRGB<byte> >();
00075 
00076   int dw = w / hist.size();
00077   Image<byte> res(w, h, ZEROS);
00078 
00079   // draw lines for 10% marks:
00080   for (int j = 0; j < 10; j++)
00081     drawLine(res, Point2D<int>(0, int(j * 0.1F * h)),
00082              Point2D<int>(w-1, int(j * 0.1F * h)), byte(64));
00083   drawLine(res, Point2D<int>(0, h-1), Point2D<int>(w-1, h-1), byte(64));
00084 
00085   double minii, maxii;
00086   findMinMax(hist, minii, maxii);
00087 
00088    // uniform histogram
00089   if (maxii == minii) minii = maxii - 1.0F;
00090 
00091   double range = maxii - minii;
00092 
00093   for (uint i = 0; i < hist.size(); i++)
00094     {
00095       int t = abs(h - int((hist[i] - minii) / range * double(h)));
00096 
00097       // if we have at least 1 pixel worth to draw
00098       if (t < h-1)
00099         {
00100           for (int j = 0; j < dw; j++)
00101             drawLine(res,
00102                      Point2D<int>(dw * i + j, t),
00103                      Point2D<int>(dw * i + j, h - 1),
00104                      byte(255));
00105           //drawRect(res, Rectangle::tlbrI(t,dw*i,h-1,dw*i+dw-1), byte(255));
00106         }
00107     }
00108   return res;
00109 }
00110 
00111 void smoothHist(std::vector<double> &hist)
00112 {
00113   const uint siz = hist.size();
00114   float vect[siz];
00115 
00116   for (uint n = 0 ; n < siz ; n++)
00117   {
00118     float val0 = hist[ (n-1+siz) % siz ];
00119     float val1 = hist[ (n  +siz) % siz ];
00120     float val2 = hist[ (n+1+siz) % siz ];
00121 
00122     vect[n] = 0.25F * (val0 + 2.0F*val1 + val2);
00123   }
00124 
00125   for (uint n = 0 ; n < siz ; n++) hist[n] = vect[n];
00126 }
00127 
00128 void normalizeHist(std::vector<double> &hist, double high, double low)
00129 {
00130 
00131   double oldmin, oldmax;
00132   findMinMax(hist, oldmin, oldmax);
00133 
00134    float scale = float(oldmax) - float(oldmin);
00135    //if (fabs(scale) < 1.0e-10F) scale = 1.0; // image is uniform
00136    const float nscale = (float(high) - float(low)) / scale;
00137 
00138    for(uint i=0; i<hist.size(); i++)
00139    {
00140      hist[i] = low + (float(hist[i]) - float(oldmin)) * nscale ;
00141    }
00142 
00143 
00144 }
00145 
00146 Point2D<int> processOriMap(Image<PixRGB<byte> > &inputImg,
00147     SOFM &sofm,
00148     int ii,
00149     nub::ref<OutputFrameSeries> &ofs)
00150 {
00151 
00152     //get Orientations
00153     Image<byte> lum = luminance(inputImg);
00154     Image<float> mag, ori;
00155     gradientSobel(lum, mag, ori, 3);
00156     std::vector<double> oriHist(360,0);
00157     for(int i=0; i<mag.getSize(); i++)
00158     {
00159       int deg = int(ori[i]*180/M_PI);
00160       if (deg < 0) deg+=360;
00161       oriHist[deg] += mag[i];
00162     }
00163     normalizeHist(oriHist, 0.0F, 255.0F);
00164     smoothHist(oriHist);
00165     ofs->writeRGB(showHist(oriHist,0), "Ori Hist");
00166     sofm.setInput(oriHist);
00167     sofm.propagate();
00168     Point2D<int> winner = sofm.getWinner();
00169     LINFO("Winner at %ix%i", winner.i, winner.j);
00170     Image<float> sofmOut = sofm.getMap();
00171     inplaceNormalize(sofmOut, 0.0F, 255.0F);
00172 
00173     drawCircle(sofmOut, winner, 6, 255.0F);
00174 
00175     ofs->writeRGB(sofmOut, "OriSOFM_act_map");
00176 
00177 
00178  //   sofm.SetLearningRate(ii);
00179  //   sofm.organize(oriHist);
00180     //inplaceNormalize(SMap, 0.0F, 255.0F);
00181     //
00182 
00183     //save the info every 100 triels
00184 //    if (!(ii%100))
00185 //      sofm.WriteNet("oriSofm.net");
00186 //
00187     return winner;
00188 }
00189 
00190 Point2D<int> processRGColMap(Image<PixRGB<byte> > &inputImg,
00191     SOFM &sofm,
00192     int ii,
00193     nub::ref<OutputFrameSeries> &ofs)
00194 {
00195 
00196     Image<float> rg,by;
00197     getRGBY(inputImg, rg, by, byte(25));
00198     inplaceNormalize(rg, 0.0F, 255.0F);
00199 
00200     std::vector<double> colHist(256,0);
00201     for(int i=0; i<rg.getSize(); i++)
00202     {
00203       int col = (int)rg[i];
00204       colHist[col]++;
00205     }
00206     normalizeHist(colHist, 0.0F, 255.0F);
00207     smoothHist(colHist);
00208     ofs->writeRGB(showHist(colHist,0), "ColRG Hist");
00209     sofm.setInput(colHist);
00210     sofm.propagate();
00211     Point2D<int> winner = sofm.getWinner();
00212     Image<float> sofmOut = sofm.getMap();
00213     inplaceNormalize(sofmOut, 0.0F, 255.0F);
00214 
00215     drawCircle(sofmOut, winner, 6, 255.0F);
00216 
00217     ofs->writeRGB(sofmOut, "colRGSOFM_act_map");
00218 
00219     //sofm.SetLearningRate(ii);
00220     //sofm.organize(colHist);
00221 
00222     //save the info every 100 triels
00223    // if (!(ii%100))
00224    //   sofm.WriteNet("colRGSofm.net");
00225 
00226     return winner;
00227 }
00228 
00229 Point2D<int> processBYColMap(Image<PixRGB<byte> > &inputImg,
00230     SOFM &sofm,
00231     int ii,
00232     nub::ref<OutputFrameSeries> &ofs)
00233 {
00234 
00235     Image<float> rg,by;
00236     getRGBY(inputImg, rg, by, byte(25));
00237     inplaceNormalize(by, 0.0F, 255.0F);
00238 
00239     std::vector<double> colHist(256,0);
00240     for(int i=0; i<by.getSize(); i++)
00241     {
00242       int col = (int)by[i];
00243       colHist[col]++;
00244     }
00245     normalizeHist(colHist, 0.0F, 255.0F);
00246     smoothHist(colHist);
00247     ofs->writeRGB(showHist(colHist,0), "ColBY Hist");
00248     sofm.setInput(colHist);
00249     sofm.propagate();
00250     Point2D<int> winner = sofm.getWinner();
00251     Image<float> sofmOut = sofm.getMap();
00252     inplaceNormalize(sofmOut, 0.0F, 255.0F);
00253 
00254     drawCircle(sofmOut, winner, 6, 255.0F);
00255 
00256     ofs->writeRGB(sofmOut, "ColBySOFM_act_map");
00257 
00258     //sofm.SetLearningRate(ii);
00259     //sofm.organize(colHist);
00260 
00261     //save the info every 100 triels
00262 //    if (!(ii%100))
00263 //      sofm.WriteNet("colBYSofm.net");
00264 //
00265     return winner;
00266 }
00267 
00268 Point2D<int> processSceneMap(
00269     Point2D<int> oriWinner,
00270     Point2D<int> rgWinner,
00271     Point2D<int> byWinner,
00272     SOFM &sofm,
00273     int ii,
00274     nub::ref<OutputFrameSeries> &ofs)
00275 {
00276 
00277     std::vector<double> input(6,0);
00278     input[0] = oriWinner.i;
00279     input[1] = oriWinner.j;
00280     input[2] = rgWinner.i;
00281     input[3] = rgWinner.j;
00282     input[4] = byWinner.i;
00283     input[5] = byWinner.j;
00284 
00285 
00286     sofm.setInput(input);
00287     sofm.propagate();
00288     Point2D<int> winner = sofm.getWinner();
00289     Image<float> sofmOut = sofm.getMap();
00290     inplaceNormalize(sofmOut, 0.0F, 255.0F);
00291 
00292     drawCircle(sofmOut, winner, 6, 255.0F);
00293 
00294     ofs->writeRGB(sofmOut, "SceneSOFM_act_map");
00295 
00296     //sofm.SetLearningRate(ii);
00297     //sofm.organize(input);
00298 
00299     //save the info every 100 triels
00300   //  if (!(ii%100))
00301   //    sofm.WriteNet("sceneSofm.net");
00302 
00303     return winner;
00304 }
00305 
00306 
00307 
00308 int main(int argc, char** argv)
00309 {
00310 
00311   MYLOGVERB = LOG_INFO;  // suppress debug messages
00312 
00313   // Instantiate a ModelManager:
00314   ModelManager manager("Test SOFM");
00315 
00316   nub::ref<OutputFrameSeries> ofs(new OutputFrameSeries(manager));
00317   manager.addSubComponent(ofs);
00318 
00319   // Parse command-line:
00320   if (manager.parseCommandLine((const int)argc, (const char**)argv, "<path to mages>", 1, 1) == false)
00321     return(1);
00322 
00323   manager.start();
00324 
00325   TestImages testImages(manager.getExtraArg(0).c_str(),
00326       TestImages::MIT_LABELME);
00327 
00328 #define SIZE 256
00329   SOFM oriSofm("oriSofm.net", 360, SIZE, SIZE);
00330   SOFM rgSofm("rgSofm.net", 256, SIZE, SIZE);
00331   SOFM bySofm("bySofm.net", 256, SIZE, SIZE);
00332   SOFM sceneSofm("sceneSofm.net", 6, SIZE, SIZE);
00333 
00334   //oriSofm.RandomWeights();
00335   //rgSofm.RandomWeights();
00336   //bySofm.RandomWeights();
00337   //sceneSofm.RandomWeights();
00338 
00339   oriSofm.ReadNet("oriSofm.net");
00340   rgSofm.ReadNet("colRGSofm.net");
00341   bySofm.ReadNet("colBYSofm.net");
00342   sceneSofm.ReadNet("sceneSofm.net");
00343 
00344   initRandomNumbers();
00345 
00346 
00347   // main loop:
00348   int ii=0;
00349 
00350   LINFO("Process input");
00351   while(1)
00352   {
00353     //choose a scene at random
00354     int scene = randomUpToIncluding(testImages.getNumScenes()-1);
00355 
00356     Image<PixRGB<byte> > inputImg = testImages.getScene(scene);
00357 
00358     ofs->writeRGB(inputImg, "Input");
00359 
00360     Point2D<int> oriWinner = processOriMap(inputImg, oriSofm, ii, ofs);
00361     LINFO("oriWinner %ix%i", oriWinner.i, oriWinner.j);
00362 
00363     Point2D<int> colRGWinner = processRGColMap(inputImg, rgSofm, ii, ofs);
00364     LINFO("colRGWinner %ix%i", colRGWinner.i, colRGWinner.j);
00365 
00366     Point2D<int> colBYWinner = processBYColMap(inputImg, bySofm, ii, ofs);
00367     LINFO("colBYWinner %ix%i", colBYWinner.i, colBYWinner.j);
00368 
00369     Point2D<int> sceneWinner = processSceneMap(oriWinner, colRGWinner, colBYWinner,
00370         sceneSofm, ii, ofs);
00371 
00372 
00373     ii++;
00374 
00375     getchar();
00376 
00377   }
00378 
00379   // stop all our ModelComponents
00380   manager.stop();
00381 
00382   // all done!
00383   return 0;
00384 }
00385 
00386 // ######################################################################
00387 /* So things look consistent in everyone's emacs... */
00388 /* Local Variables: */
00389 /* indent-tabs-mode: nil */
00390 /* End: */