openvision.C

Go to the documentation of this file.
00001 /*!@file INVT/openvision.C  version of ezvision.C that uses on-file color
00002   filters */
00003 
00004 // //////////////////////////////////////////////////////////////////// //
00005 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the //
00006 // University of Southern California (USC) and the iLab at USC.         //
00007 // See http://iLab.usc.edu for information about this project.          //
00008 // //////////////////////////////////////////////////////////////////// //
00009 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00010 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00011 // in Visual Environments, and Applications'' by Christof Koch and      //
00012 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00013 // pending; application number 09/912,225 filed July 23, 2001; see      //
00014 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00015 // //////////////////////////////////////////////////////////////////// //
00016 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00017 //                                                                      //
00018 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00019 // redistribute it and/or modify it under the terms of the GNU General  //
00020 // Public License as published by the Free Software Foundation; either  //
00021 // version 2 of the License, or (at your option) any later version.     //
00022 //                                                                      //
00023 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00024 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00025 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00026 // PURPOSE.  See the GNU General Public License for more details.       //
00027 //                                                                      //
00028 // You should have received a copy of the GNU General Public License    //
00029 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00030 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00031 // Boston, MA 02111-1307 USA.                                           //
00032 // //////////////////////////////////////////////////////////////////// //
00033 //
00034 // Primary maintainer for this file: Laurent Itti <itti@usc.edu>
00035 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/INVT/openvision.C $
00036 // $Id: openvision.C 10845 2009-02-13 08:49:12Z itti $
00037 //
00038 
00039 #include "Channels/RGBConvolveChannel.H"
00040 #include "Component/ModelManager.H"
00041 #include "Image/ColorOps.H"
00042 #include "Image/MathOps.H"
00043 #include "Image/MatrixOps.H"
00044 #include "Image/Pixels.H"
00045 #include "Image/Transforms.H"
00046 #include "Media/FrameSeries.H"
00047 #include "Neuro/NeuroOpts.H"
00048 #include "Channels/RawVisualCortex.H"
00049 #include "Raster/Raster.H"
00050 #include "Util/sformat.H"
00051 
00052 #include <fstream>
00053 
00054 #define NB_FILTERS  3
00055 #define NB_COEFFS 8
00056 
00057 int main(const int argc, const char **argv)
00058 {
00059   int n = NB_COEFFS;
00060   int m = NB_COEFFS * NB_COEFFS * NB_FILTERS * 3;
00061 
00062   MYLOGVERB = LOG_INFO;  // Suppress debug messages
00063 
00064   // Generate the haar transform matrix:
00065   Image<float> hmat(n, n, ZEROS);
00066   for(int i = 0; i < n; i++)
00067     {
00068       hmat.setVal(i, 0, 1.0f);
00069     }
00070   for(int i = 0; i < n / 2; i++)
00071     {
00072       hmat.setVal(i, 1, 1.0f);
00073       hmat.setVal(i + n / 2, 1, -1.0f);
00074       if (i - 2 < 0)
00075         {
00076           hmat.setVal(i, 2, 1.0f);
00077           hmat.setVal(i + 2, 2, -1.0f);
00078         }
00079       else
00080         {
00081           hmat.setVal(i + 2, 3, 1.0f);
00082           hmat.setVal(i + 4, 3, -1.0f);
00083         }
00084       hmat.setVal(2 * i, i + n / 2, 1.0f);
00085       hmat.setVal(2 * i + 1, i + n / 2, -1.0f);
00086     }
00087 
00088   // Instantiate a ModelManager:
00089   ModelManager manager("Open Attention Model");
00090 
00091   // Instantiate our various ModelComponents:
00092   nub::soft_ref<RawVisualCortex> vcx(new RawVisualCortex(manager));
00093   manager.addSubComponent(vcx);
00094 
00095   // let's make one dummy RGBConvolveChannel so that we get the
00096   // command-line options for it:
00097   nub::soft_ref<RGBConvolveChannel> channel(new RGBConvolveChannel(manager));
00098   vcx->addSubChan(channel);
00099 
00100   // Parse command-line:
00101   if (manager.parseCommandLine(argc, argv, "<data.txt> <image.ppm>",
00102                                2, -1) == false)
00103     return(1);
00104 
00105   // Ok, get rid of our placeholder channel; the manager will keep a
00106   // trace of its configured options:
00107   vcx->removeAllSubChans();
00108 
00109   // Get the input image name:
00110   char framename[1024];
00111   strncpy(framename, manager.getExtraArg(1).c_str(), 1023);
00112 
00113   // Load data:
00114   float data[m];
00115   char dataname[1024];
00116   strncpy(dataname, manager.getExtraArg(0).c_str(), 1023);
00117   std::ifstream inputfile (dataname);
00118   if (inputfile.is_open())
00119     {
00120       for (int j = 0; j < m; j++)
00121         inputfile >> data[j];
00122       inputfile.close();
00123     }
00124   else
00125     {
00126       LERROR("*** Cannot open input file !");
00127       return 1;
00128     }
00129 
00130   // Convert data into filters:
00131   ImageSet<float> trans(NB_FILTERS * 3);
00132   for (int i = 0; i < NB_FILTERS * 3; i++)
00133     trans[i] = Image<float>(data + (n * n * i), n, n);
00134   ImageSet<float> filter(NB_FILTERS * 3);
00135   Dims filterdim(8, 8);
00136   for (int i = 0; i < NB_FILTERS * 3; i++)
00137     filter[i] = scaleBlock(matrixMult(transpose(hmat),
00138                                       matrixMult(trans[i], hmat)),
00139                            filterdim);
00140 
00141   for (int i = 0; i < NB_FILTERS; i++)
00142     {
00143       Image<float> rf = filter[3 * i];
00144       Image<float> gf = filter[3 * i + 1];
00145       Image<float> bf = filter[3 * i + 2];
00146       float min, max, fmax;
00147       getMinMax(rf, min, max);
00148       if (fabs(min) > fabs(max))
00149         fmax = min;
00150       else
00151         fmax = max;
00152       getMinMax(gf, min, max);
00153       if (fabs(min) > fmax)
00154         fmax = min;
00155       if (fabs(max) > fmax)
00156         fmax = max;
00157       getMinMax(bf, min, max);
00158       if (fabs(min) > fmax)
00159         fmax = min;
00160       if (fabs(max) > fmax)
00161         fmax = max;
00162       if (fmax < 1.0e-10F)
00163         fmax = 1; // images are uniform
00164       float scale = 128.0F / fmax;
00165       Image<float>::iterator rptr = rf.beginw();
00166       Image<float>::iterator gptr = gf.beginw();
00167       Image<float>::iterator bptr = bf.beginw();
00168       Image<float>::iterator stop = rf.endw();
00169       while (rptr != stop)
00170         {
00171           *rptr = (float)(float(*rptr) * scale);
00172           *gptr = (float)(float(*gptr) * scale);
00173           *bptr = (float)(float(*bptr) * scale);
00174           ++rptr;
00175           ++gptr;
00176           ++bptr;
00177         }
00178       Image< PixRGB<byte> > color_filter = makeRGB(rf + 128.0F,
00179                                                    gf + 128.0F,
00180                                                    bf + 128.0F);
00181       Raster::WriteRGB(color_filter, sformat("filter%i.ppm", i));
00182     }
00183 
00184   for (int i = 0; i < NB_FILTERS; i++)
00185     {
00186       // Create a channel attached to each filter:
00187       nub::soft_ref<RGBConvolveChannel> channel(new RGBConvolveChannel(manager));
00188 
00189       channel->setDescriptiveName(sformat("RGBConvolve%d", i));
00190       channel->setTagName(sformat("rgbconv%d", i));
00191 
00192       channel->exportOptions(MC_RECURSE);  // Get our configs
00193 
00194 //       const char *filtername = manager.getExtraArg(i).c_str();
00195 //       FILE *f = fopen(filtername, "r");
00196 //       if (f == NULL) LFATAL("Cannot open %s", filtername);
00197 
00198 //       // Scan the filter file to get the 3 kernels:
00199 //       int w, h;
00200 //       if (fscanf(f, "%d %d\n", &w, &h) != 2)
00201 //         LFATAL("Bogus first line in %s", filtername);
00202 //       LINFO("Building %dx%d RGB kernel from '%s'", w, h, filtername);
00203 //       Image<float> rker(w, h, NO_INIT);
00204 //       for (int j = 0; j < h; j ++)
00205 //         for (int i = 0; i < w; i ++) {
00206 //           float coeff;
00207 //           if (fscanf(f, "%f\n", &coeff) != 1)
00208 //             LFATAL("Bogus coeff in %s at red (%d, %d)",
00209 //                    filtername, i, j);
00210 //           rker.setVal(i, j, coeff);
00211 //         }
00212 //       Image<float> gker(w, h, NO_INIT);
00213 //       for (int j = 0; j < h; j ++)
00214 //         for (int i = 0; i < w; i ++) {
00215 //           float coeff;
00216 //           if (fscanf(f, "%f\n", &coeff) != 1)
00217 //             LFATAL("Bogus coeff in %s at green (%d, %d)",
00218 //                    filtername, i, j);
00219 //           gker.setVal(i, j, coeff);
00220 //         }
00221 //       Image<float> bker(w, h, NO_INIT);
00222 //       for (int j = 0; j < h; j ++)
00223 //         for (int i = 0; i < w; i ++) {
00224 //           float coeff;
00225 //           if (fscanf(f, "%f\n", &coeff) != 1)
00226 //             LFATAL("Bogus coeff in %s at blue (%d, %d)",
00227 //                    filtername, i, j);
00228 //           bker.setVal(i, j, coeff);
00229 //         }
00230 
00231       // Assign the 3 filters to the channel:
00232       channel->setFilters(filter[3 * i], filter[3 * i + 1],
00233                           filter[3 * i + 2],
00234                           CONV_BOUNDARY_ZERO);
00235 
00236       // Attach the channel to our visual cortex:
00237       vcx->addSubChan(channel);
00238     }
00239 
00240   // Let's get all our ModelComponent instances started:
00241   manager.start();
00242 
00243   // ####################################################################
00244   // Main processing:
00245 
00246   // Read the input image:
00247   LINFO("*** Loading image %s", framename);
00248   Image< PixRGB<byte> > picture = Raster::ReadRGB(framename, RASFMT_PNM);
00249 
00250   // Process the image through the visual cortex:
00251   vcx->input(InputFrame::fromRgb(&picture));
00252 
00253   // Get the resulting saliency map:
00254   Image<float> sm = vcx->getOutput();
00255 
00256   // Normalize the saliency map:
00257   inplaceNormalize(sm, 0.0f, 255.0f);
00258   Image<byte> smb = sm;
00259 
00260   // Save the normalized saliency map:
00261   int i = strlen(framename) - 1; while(i > 0 && framename[i] != '.') i--;
00262   framename[i] = '\0'; // Remove input file extension
00263   LINFO("*** Saving '%s-SM.pgm'...", framename);
00264   Raster::WriteGray(smb, sformat("%s-SM.pgm", framename));
00265 
00266 //   // Chamfer the binary mask, rescale it to the saliency map's size,
00267 //   // and invert it:
00268 //   Dims dim = smb.getDims();
00269 //   Image<byte> blur_mask = binaryReverse(chamfer34(mask, (byte) 255),
00270 //                                         (byte) 255);
00271 //   inplaceLowThresh(blur_mask, (byte) 200);
00272 //   Image<byte> mask_in = scaleBlock(blur_mask, dim);
00273 //   Image<byte> mask_out = binaryReverse(mask_in, (byte) 255);
00274 
00275 //   // Weight the saliency map using the in and out masks:
00276 //   Image<float> smb_in = (mask_in * (1.0f / 255.0f)) * smb;
00277 //   Image<float> smb_out = (mask_out * (1.0f / 255.0f)) * smb;
00278 
00279 //   // Get the max_in and max_out values:
00280 //   float max_in, max_out, min;
00281 //   getMinMax(smb_in, min, max_in);
00282 //   getMinMax(smb_out, min, max_out);
00283 
00284 //   // Compute the error:
00285 //   float detect_coeff = 1.0f - ((max_in - max_out) / 255.0f);
00286 //   float error_val = detect_coeff * detect_coeff;
00287 //   // Display the error value:
00288 //   std::cout << error_val << std::endl;
00289 
00290   // Save the result:
00291   // LINFO("*** Saving 'max_in_out.txt'...", max_in, max_out);
00292   // FILE *result = fopen("max_in_out.txt", "w");
00293   //fprintf(result, "%f %f", max_in, max_out);
00294 
00295   // Stop all our ModelComponents
00296   manager.stop();
00297 
00298   // Convolve the picture with the filters and save the results
00299   for (int i = 0; i < NB_FILTERS; i++)
00300     {
00301       RGBConvolvePyrBuilder<float> rgbcpb(filter[3 * i],
00302                                           filter[3 * i + 1],
00303                                           filter[3 * i + 2],
00304                                           CONV_BOUNDARY_ZERO);
00305       ImageSet< PixRGB<float> > rgbpyr = rgbcpb.build2(picture, 0, 4);
00306       for (int j = 0; j < 4; j++)
00307         {
00308           Image<float> rc, gc, bc;
00309           getComponents(rgbpyr[j], rc, gc, bc);
00310           float min, max, fmax;
00311           getMinMax(rc, min, max);
00312           if (fabs(min) > fabs(max))
00313             fmax = min;
00314           else
00315             fmax = max;
00316           getMinMax(gc, min, max);
00317           if (fabs(min) > fmax)
00318             fmax = min;
00319           if (fabs(max) > fmax)
00320             fmax = max;
00321           getMinMax(bc, min, max);
00322           if (fabs(min) > fmax)
00323             fmax = min;
00324           if (fabs(max) > fmax)
00325             fmax = max;
00326           if (fmax < 1.0e-10F)
00327             fmax = 1; // images are uniform
00328           float scale = 255.0F / fmax;
00329           Image<float>::iterator rptr = rc.beginw();
00330           Image<float>::iterator gptr = gc.beginw();
00331           Image<float>::iterator bptr = bc.beginw();
00332           Image<float>::iterator stop = rc.endw();
00333           while (rptr != stop)
00334             {
00335               *rptr = (float)(float(*rptr) * scale);
00336               *gptr = (float)(float(*gptr) * scale);
00337               *bptr = (float)(float(*bptr) * scale);
00338               ++rptr;
00339               ++gptr;
00340               ++bptr;
00341             }
00342           Image<float> prc, nrc, pgc, ngc, pbc, nbc;
00343           splitPosNeg(rc, prc, nrc);
00344           splitPosNeg(gc, pgc, ngc);
00345           splitPosNeg(bc, pbc, nbc);
00346           rgbpyr[j] = makeRGB((rc / 2.0F) + 128.0F,
00347                               (gc / 2.0F) + 128.0F,
00348                               (bc / 2.0F) + 128.0F);
00349           Image< PixRGB<byte> > rgbpyrb = rgbpyr[j];
00350           Raster::WriteRGB(rgbpyrb, sformat("%s/conv-f%i-l%i.ppm",
00351                                             framename, i, j));
00352           rgbpyr[j] = makeRGB(prc, pgc, pbc);
00353           rgbpyrb = rgbpyr[j];
00354           Raster::WriteRGB(rgbpyrb, sformat("%s/conv-f%i-l%i-pos.ppm",
00355                                             framename, i, j));
00356           rgbpyr[j] = makeRGB(nrc, ngc, nbc);
00357           rgbpyrb = rgbpyr[j];
00358           Raster::WriteRGB(rgbpyrb, sformat("%s/conv-f%i-l%i-neg.ppm",
00359                                             framename, i, j));
00360         }
00361     }
00362 
00363   // All done!
00364   return 0;
00365 }
00366 
00367 // ######################################################################
00368 /* So things look consistent in everyone's emacs... */
00369 /* Local Variables: */
00370 /* indent-tabs-mode: nil */
00371 /* End: */