openvision3.C

Go to the documentation of this file.
00001 /*!@file INVT/openvision3.C version of ezvision.C that uses on-file gray
00002   filters */
00003 
00004 // //////////////////////////////////////////////////////////////////// //
00005 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the //
00006 // University of Southern California (USC) and the iLab at USC.         //
00007 // See http://iLab.usc.edu for information about this project.          //
00008 // //////////////////////////////////////////////////////////////////// //
00009 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00010 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00011 // in Visual Environments, and Applications'' by Christof Koch and      //
00012 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00013 // pending; application number 09/912,225 filed July 23, 2001; see      //
00014 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00015 // //////////////////////////////////////////////////////////////////// //
00016 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00017 //                                                                      //
00018 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00019 // redistribute it and/or modify it under the terms of the GNU General  //
00020 // Public License as published by the Free Software Foundation; either  //
00021 // version 2 of the License, or (at your option) any later version.     //
00022 //                                                                      //
00023 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00024 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00025 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00026 // PURPOSE.  See the GNU General Public License for more details.       //
00027 //                                                                      //
00028 // You should have received a copy of the GNU General Public License    //
00029 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00030 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00031 // Boston, MA 02111-1307 USA.                                           //
00032 // //////////////////////////////////////////////////////////////////// //
00033 //
00034 // Primary maintainer for this file: Laurent Itti <itti@usc.edu>
00035 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/INVT/openvision3.C $
00036 // $Id: openvision3.C 10845 2009-02-13 08:49:12Z itti $
00037 //
00038 
00039 #include "Channels/ChannelOpts.H"
00040 #include "Channels/ConvolveChannel.H"
00041 #include "Component/ModelManager.H"
00042 #include "Image/ColorOps.H"
00043 #include "Image/DrawOps.H"
00044 #include "Image/MathOps.H"
00045 #include "Image/MatrixOps.H"
00046 #include "Image/Pixels.H"
00047 #include "Image/ShapeOps.H"
00048 #include "Media/FrameSeries.H"
00049 #include "Channels/RawVisualCortex.H"
00050 #include "Raster/Raster.H"
00051 #include "Util/sformat.H"
00052 
00053 #include <fstream>
00054 
00055 #define NB_FILTERS 4
00056 #define NB_COEFFS 8
00057 #define RAD 64
00058 #define NB_CONV 1
00059 
00060 int main(const int argc, const char **argv)
00061 {
00062   int n = NB_COEFFS;
00063   int m = NB_COEFFS * NB_COEFFS * NB_FILTERS;
00064 
00065   MYLOGVERB = LOG_INFO;  // Suppress debug messages
00066 
00067   // Generate the haar transform matrix:
00068   Image<float> hmat(n, n, ZEROS);
00069   for(int i = 0; i < n; i++)
00070     hmat.setVal(i, 0, 1.0f);
00071   for(int i = 0; i < n / 2; i++)
00072     {
00073       hmat.setVal(i, 1, 1.0f);
00074       hmat.setVal(i + n / 2, 1, -1.0f);
00075       if (i - 2 < 0)
00076         {
00077           hmat.setVal(i, 2, 1.0f);
00078           hmat.setVal(i + 2, 2, -1.0f);
00079         }
00080       else
00081         {
00082           hmat.setVal(i + 2, 3, 1.0f);
00083           hmat.setVal(i + 4, 3, -1.0f);
00084         }
00085       hmat.setVal(2 * i, i + n / 2, 1.0f);
00086       hmat.setVal(2 * i + 1, i + n / 2, -1.0f);
00087     }
00088 
00089   // Instantiate a ModelManager:
00090   ModelManager manager("Open Attention Model");
00091 
00092   // Instantiate our various ModelComponents:
00093   nub::soft_ref<RawVisualCortex> vcx(new RawVisualCortex(manager));
00094   manager.addSubComponent(vcx);
00095   manager.setOptionValString(&OPT_MaxNormType, "Maxnorm");
00096 
00097   // let's make one dummy RGBConvolveChannel so that we get the
00098   // command-line options for it:
00099   nub::soft_ref<ConvolveChannel> cchannel(new ConvolveChannel(manager));
00100   vcx->addSubChan(cchannel);
00101 
00102   // Parse command-line:
00103   if (manager.parseCommandLine(argc, argv, "<data.txt> <image.ppm>",
00104                                2, -1) == false)
00105     return(1);
00106 
00107   // Ok, get rid of our placeholder channel; the manager will keep a
00108   // trace of its configured options:
00109   vcx->removeAllSubChans();
00110 
00111   // Get the input image name:
00112   const std::string framename = manager.getExtraArg(1);
00113 
00114   // Load Scanpaths:
00115   Point2D<int> ch[20];
00116   Point2D<int> kp[20];
00117   Point2D<int> pw[20];
00118   Point2D<int> wkm[20];
00119   int nch, nkp, npw, nwkm;
00120   int count = 0;
00121   std::ifstream chfile(sformat("%s.ch.dat", framename.c_str()).c_str());
00122   bool eofile = false;
00123   while (!chfile.eof())
00124     {
00125       int px, py;
00126       chfile >> px >> py;
00127       if (!chfile.eof())
00128         {
00129           ch[count].i = px;
00130           ch[count].j = py;
00131           count++;
00132         }
00133       else
00134         eofile = true;
00135     }
00136   chfile.close();
00137   nch = count;
00138   count = 0;
00139   std::ifstream kpfile(sformat("%s.kp.dat", framename.c_str()).c_str());
00140   eofile = false;
00141   while (!eofile)
00142     {
00143       int px, py;
00144       kpfile >> px >>py;
00145       if (!kpfile.eof())
00146         {
00147           kp[count].i = px;
00148           kp[count].j = py;
00149           count++;
00150         }
00151       else
00152         eofile = true;
00153     }
00154   kpfile.close();
00155   nkp = count;
00156   count = 0;
00157   std::ifstream pwfile(sformat("%s.pw.dat", framename.c_str()).c_str());
00158   eofile = false;
00159   while (!eofile)
00160     {
00161       int px, py;
00162       pwfile >> px >> py;
00163       if (!pwfile.eof())
00164         {
00165           pw[count].i = px;
00166           pw[count].j = py;
00167           count++;
00168         }
00169       else
00170         eofile = true;
00171     }
00172   pwfile.close();
00173   npw = count;
00174   count = 0;
00175   std::ifstream wkmfile(sformat("%s.wkm.dat", framename.c_str()).c_str());
00176   eofile = false;
00177   while (!eofile)
00178     {
00179       int px, py;
00180       wkmfile >> px >> py;
00181       if (!wkmfile.eof())
00182         {
00183           wkm[count].i = px;
00184           wkm[count].j = py;
00185           count++;
00186         }
00187       else
00188         eofile = true;
00189     }
00190   wkmfile.close();
00191   nwkm = count;
00192 
00193   // Load data:
00194   float data[m];
00195   char dataname[1024];
00196   strncpy(dataname, manager.getExtraArg(0).c_str(), 1023);
00197   std::ifstream inputfile (dataname);
00198   if (inputfile.is_open())
00199     {
00200       for (int j = 0; j < m; j++)
00201         inputfile >> data[j];
00202       inputfile.close();
00203     }
00204   else
00205     {
00206       LERROR("*** Cannot open input file !");
00207       return 1;
00208     }
00209 
00210   // Convert data into filters:
00211   ImageSet<float> trans(NB_FILTERS);
00212   for (int i = 0; i < NB_FILTERS; i++)
00213     trans[i] = Image<float>(data + (n * n * i), n, n);
00214   ImageSet<float> filter(NB_FILTERS);
00215   for (int i = 0; i < NB_FILTERS; i++)
00216     filter[i] = matrixMult(transpose(hmat),
00217                            matrixMult(trans[i], hmat));
00218 
00219   // Compute the dot product of the filters:
00220   int nb_prod = (NB_FILTERS * (NB_FILTERS - 1)) / 2;
00221   ImageSet<float> prod(nb_prod);
00222   int k = 0;
00223   for (int j = 0; j < NB_FILTERS - 1; j++)
00224     for (int i = j + 1; i < NB_FILTERS; i++)
00225       {
00226         prod[k] = filter[j] * filter[i];
00227         k++;
00228       }
00229   float dotprod = 0.0f;
00230   for (int i = 0; i < nb_prod; i++)
00231     dotprod += sum(prod[i]);
00232 
00233   // Inject filterx in Visual Cortex:
00234   for (int i = 0; i < NB_FILTERS; i++)
00235     {
00236       // Create a channel attached to each filter:
00237       nub::soft_ref<ConvolveChannel> channel(new ConvolveChannel(manager));
00238 
00239       channel->setDescriptiveName(sformat("Convolve%d", i));
00240       channel->setTagName(sformat("conv%d", i));
00241 
00242       channel->exportOptions(MC_RECURSE);  // Get our configs
00243 
00244       // Assign the 3 filters to the channel:
00245       channel->setFilter(filter[i], CONV_BOUNDARY_ZERO);
00246 
00247       // Attach the channel to our visual cortex:
00248       vcx->addSubChan(channel);
00249     }
00250 
00251   // Let's get all our ModelComponent instances started:
00252   manager.start();
00253 
00254   // ####################################################################
00255   // Main processing:
00256 
00257   // Read the input image:
00258   LINFO("*** Loading image %s", framename.c_str());
00259   Image< PixRGB<byte> > picture =
00260     Raster::ReadRGB(sformat("%s.ppm", framename.c_str()));
00261 
00262   // Process the image through the visual cortex:
00263   vcx->input(InputFrame::fromRgb(&picture));
00264 
00265   // Get the resulting saliency map:
00266   Image<float> sm = vcx->getOutput();
00267 
00268   // Normalize the saliency map:
00269   inplaceNormalize(sm, 0.0f, 255.0f);
00270   Image<byte> smb = sm;
00271 
00272   // Save the normalized saliency map:
00273   Raster::WriteGray(smb, sformat("%s-SM.pgm", framename.c_str()));
00274 
00275   // Rescale saliency map:
00276   Dims dim = picture.getDims();
00277   Image< PixRGB<byte> > smr = rescale(smb, dim);
00278 
00279   // Get the average saliency:
00280   double avgsal = mean(sm);
00281 
00282   // Get the map level to scale things down:
00283   const LevelSpec lspec = vcx->getModelParamVal<LevelSpec>("LevelSpec");
00284   int sml = lspec.mapLevel();
00285 
00286   // Scale the radius
00287   int radius = RAD >> sml;
00288 
00289   // Get the saliency on each end of saccade and draw circles on big sm:
00290   float chsal = 0;
00291   int sc = 1 << sml;
00292   PixRGB<byte> red(200, 0, 0);
00293   PixRGB<byte> green(0, 200, 0);
00294   PixRGB<byte> blue(0, 0, 200);
00295   PixRGB<byte> yellow(200, 200, 0);
00296   for (int i = 0; i < nch; i++)
00297     {
00298       chsal += getLocalMax(sm, ch[i] / sc, radius);
00299       drawCircle(smr, ch[i], RAD, red);
00300     }
00301   chsal /= nch;
00302   float kpsal = 0;
00303   for (int i = 0; i < nkp; i++)
00304     {
00305       kpsal += getLocalMax(sm, kp[i] / sc, radius);
00306       drawCircle(smr, kp[i], RAD, green);
00307     }
00308   kpsal /= nkp;
00309   float pwsal = 0;
00310   for (int i = 0; i < npw; i++)
00311     {
00312       pwsal += getLocalMax(sm, pw[i] / sc, radius);
00313       drawCircle(smr, pw[i], RAD, blue);
00314     }
00315   pwsal /= npw;
00316   float wkmsal = 0;
00317   for (int i = 0; i < nwkm; i++)
00318     {
00319       wkmsal += getLocalMax(sm, wkm[i] / sc, radius);
00320       drawCircle(smr, wkm[i], RAD, yellow);
00321     }
00322   wkmsal /= nwkm;
00323 
00324   float goodsal = (chsal + kpsal + pwsal + wkmsal) / 4;
00325 
00326   // Save saliency map with circles:
00327   Raster::WriteRGB(smr, sformat("%s-SM_circles.ppm", framename.c_str()));
00328 
00329   // Compute the error:
00330   float error = (1 + avgsal) / (1 + goodsal);
00331   float result = error + 0.00001f * fabs(dotprod);
00332 
00333   // Save results:
00334   const std::string resname = sformat("%s-score.txt", framename.c_str());
00335   std::ofstream resultfile(resname.c_str());
00336   resultfile << "*** " << framename << " ***\n";
00337   resultfile << "Filters product : " << fabs(dotprod) << "\n";
00338   resultfile << "Saliency score : " << error << "\n";
00339   resultfile << "Total score : " << result << "\n";
00340   resultfile.close();
00341 
00342   {
00343     float fmax = 0.0f;
00344     for (int i = 0; i < NB_FILTERS; i++)
00345       {
00346         float min, max;
00347         getMinMax(filter[i], min, max);
00348         if (fabs(min) > fmax)
00349           fmax = min;
00350         if (fabs(max) > fmax)
00351           fmax = max;
00352       }
00353     if (fmax < 1.0e-10F)
00354       fmax = 1; // images are uniform
00355     float scale = 128.0F / fmax;
00356     for (int i = 0; i < NB_FILTERS; i++)
00357       {
00358         Image<float>::iterator fptr = filter[i].beginw();
00359         Image<float>::iterator stop = filter[i].endw();
00360         while (fptr != stop)
00361           {
00362             *fptr = (float)(float(*fptr) * scale);
00363             ++fptr;
00364           }
00365         Image<byte> filterb = filter[i] + 128.0f;
00366         Raster::WriteGray(filterb, sformat("filter%i.pgm", i));
00367       }
00368   }
00369 
00370   // Stop all our ModelComponents
00371   manager.stop();
00372 
00373   // Convolve the picture with the filters and save the results
00374   for (int i = 0; i < NB_FILTERS; i++)
00375     {
00376       ConvolvePyrBuilder<float> pyrb(filter[i], CONV_BOUNDARY_ZERO);
00377       Image<float> pictureg = luminance(picture);
00378       ImageSet<float> pyr = pyrb.build(pictureg, 0, NB_CONV);
00379       for (int j = 0; j < NB_CONV; j++)
00380         {
00381           float min, max, fmax;
00382           getMinMax(pyr[j], min, max);
00383           if (fabs(min) > fabs(max))
00384             fmax = min;
00385           else
00386             fmax = max;
00387           if (fmax < 1.0e-10F)
00388             fmax = 1; // convolution is uniform
00389           float scale = 128.0F / fmax;
00390           Image<float>::iterator pyrptr = pyr[j].beginw();
00391           Image<float>::iterator stop = pyr[j].endw();
00392           while (pyrptr != stop)
00393             {
00394               *pyrptr = (float)(float(*pyrptr) * scale);
00395               ++pyrptr;
00396             }
00397           Image<byte> conv = pyr[j] + 128.0f;
00398           Raster::WriteGray(conv,
00399                             sformat("%s_conv%i_filt%i.pgm",
00400                                     framename.c_str(), j, i));
00401         }
00402     }
00403   // All done!
00404   return 0;
00405 }
00406 
00407 // ######################################################################
00408 /* So things look consistent in everyone's emacs... */
00409 /* Local Variables: */
00410 /* indent-tabs-mode: nil */
00411 /* End: */