00001 /*!@file INVT/openvision3.C version of ezvision.C that uses on-file gray 00002 filters */ 00003 00004 // //////////////////////////////////////////////////////////////////// // 00005 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00006 // University of Southern California (USC) and the iLab at USC. // 00007 // See http://iLab.usc.edu for information about this project. // 00008 // //////////////////////////////////////////////////////////////////// // 00009 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00010 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00011 // in Visual Environments, and Applications'' by Christof Koch and // 00012 // Laurent Itti, California Institute of Technology, 2001 (patent // 00013 // pending; application number 09/912,225 filed July 23, 2001; see // 00014 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00015 // //////////////////////////////////////////////////////////////////// // 00016 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00017 // // 00018 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00019 // redistribute it and/or modify it under the terms of the GNU General // 00020 // Public License as published by the Free Software Foundation; either // 00021 // version 2 of the License, or (at your option) any later version. // 00022 // // 00023 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00024 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00025 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00026 // PURPOSE. See the GNU General Public License for more details. // 00027 // // 00028 // You should have received a copy of the GNU General Public License // 00029 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00030 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00031 // Boston, MA 02111-1307 USA. // 00032 // //////////////////////////////////////////////////////////////////// // 00033 // 00034 // Primary maintainer for this file: Laurent Itti <itti@usc.edu> 00035 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/INVT/openvision3.C $ 00036 // $Id: openvision3.C 10845 2009-02-13 08:49:12Z itti $ 00037 // 00038 00039 #include "Channels/ChannelOpts.H" 00040 #include "Channels/ConvolveChannel.H" 00041 #include "Component/ModelManager.H" 00042 #include "Image/ColorOps.H" 00043 #include "Image/DrawOps.H" 00044 #include "Image/MathOps.H" 00045 #include "Image/MatrixOps.H" 00046 #include "Image/Pixels.H" 00047 #include "Image/ShapeOps.H" 00048 #include "Media/FrameSeries.H" 00049 #include "Channels/RawVisualCortex.H" 00050 #include "Raster/Raster.H" 00051 #include "Util/sformat.H" 00052 00053 #include <fstream> 00054 00055 #define NB_FILTERS 4 00056 #define NB_COEFFS 8 00057 #define RAD 64 00058 #define NB_CONV 1 00059 00060 int main(const int argc, const char **argv) 00061 { 00062 int n = NB_COEFFS; 00063 int m = NB_COEFFS * NB_COEFFS * NB_FILTERS; 00064 00065 MYLOGVERB = LOG_INFO; // Suppress debug messages 00066 00067 // Generate the haar transform matrix: 00068 Image<float> hmat(n, n, ZEROS); 00069 for(int i = 0; i < n; i++) 00070 hmat.setVal(i, 0, 1.0f); 00071 for(int i = 0; i < n / 2; i++) 00072 { 00073 hmat.setVal(i, 1, 1.0f); 00074 hmat.setVal(i + n / 2, 1, -1.0f); 00075 if (i - 2 < 0) 00076 { 00077 hmat.setVal(i, 2, 1.0f); 00078 hmat.setVal(i + 2, 2, -1.0f); 00079 } 00080 else 00081 { 00082 hmat.setVal(i + 2, 3, 1.0f); 00083 hmat.setVal(i + 4, 3, -1.0f); 00084 } 00085 hmat.setVal(2 * i, i + n / 2, 1.0f); 00086 hmat.setVal(2 * i + 1, i + n / 2, -1.0f); 00087 } 00088 00089 // Instantiate a ModelManager: 00090 ModelManager manager("Open Attention Model"); 00091 00092 // Instantiate our various ModelComponents: 00093 nub::soft_ref<RawVisualCortex> vcx(new RawVisualCortex(manager)); 00094 manager.addSubComponent(vcx); 00095 manager.setOptionValString(&OPT_MaxNormType, "Maxnorm"); 00096 00097 // let's make one dummy RGBConvolveChannel so that we get the 00098 // command-line options for it: 00099 nub::soft_ref<ConvolveChannel> cchannel(new ConvolveChannel(manager)); 00100 vcx->addSubChan(cchannel); 00101 00102 // Parse command-line: 00103 if (manager.parseCommandLine(argc, argv, "<data.txt> <image.ppm>", 00104 2, -1) == false) 00105 return(1); 00106 00107 // Ok, get rid of our placeholder channel; the manager will keep a 00108 // trace of its configured options: 00109 vcx->removeAllSubChans(); 00110 00111 // Get the input image name: 00112 const std::string framename = manager.getExtraArg(1); 00113 00114 // Load Scanpaths: 00115 Point2D<int> ch[20]; 00116 Point2D<int> kp[20]; 00117 Point2D<int> pw[20]; 00118 Point2D<int> wkm[20]; 00119 int nch, nkp, npw, nwkm; 00120 int count = 0; 00121 std::ifstream chfile(sformat("%s.ch.dat", framename.c_str()).c_str()); 00122 bool eofile = false; 00123 while (!chfile.eof()) 00124 { 00125 int px, py; 00126 chfile >> px >> py; 00127 if (!chfile.eof()) 00128 { 00129 ch[count].i = px; 00130 ch[count].j = py; 00131 count++; 00132 } 00133 else 00134 eofile = true; 00135 } 00136 chfile.close(); 00137 nch = count; 00138 count = 0; 00139 std::ifstream kpfile(sformat("%s.kp.dat", framename.c_str()).c_str()); 00140 eofile = false; 00141 while (!eofile) 00142 { 00143 int px, py; 00144 kpfile >> px >>py; 00145 if (!kpfile.eof()) 00146 { 00147 kp[count].i = px; 00148 kp[count].j = py; 00149 count++; 00150 } 00151 else 00152 eofile = true; 00153 } 00154 kpfile.close(); 00155 nkp = count; 00156 count = 0; 00157 std::ifstream pwfile(sformat("%s.pw.dat", framename.c_str()).c_str()); 00158 eofile = false; 00159 while (!eofile) 00160 { 00161 int px, py; 00162 pwfile >> px >> py; 00163 if (!pwfile.eof()) 00164 { 00165 pw[count].i = px; 00166 pw[count].j = py; 00167 count++; 00168 } 00169 else 00170 eofile = true; 00171 } 00172 pwfile.close(); 00173 npw = count; 00174 count = 0; 00175 std::ifstream wkmfile(sformat("%s.wkm.dat", framename.c_str()).c_str()); 00176 eofile = false; 00177 while (!eofile) 00178 { 00179 int px, py; 00180 wkmfile >> px >> py; 00181 if (!wkmfile.eof()) 00182 { 00183 wkm[count].i = px; 00184 wkm[count].j = py; 00185 count++; 00186 } 00187 else 00188 eofile = true; 00189 } 00190 wkmfile.close(); 00191 nwkm = count; 00192 00193 // Load data: 00194 float data[m]; 00195 char dataname[1024]; 00196 strncpy(dataname, manager.getExtraArg(0).c_str(), 1023); 00197 std::ifstream inputfile (dataname); 00198 if (inputfile.is_open()) 00199 { 00200 for (int j = 0; j < m; j++) 00201 inputfile >> data[j]; 00202 inputfile.close(); 00203 } 00204 else 00205 { 00206 LERROR("*** Cannot open input file !"); 00207 return 1; 00208 } 00209 00210 // Convert data into filters: 00211 ImageSet<float> trans(NB_FILTERS); 00212 for (int i = 0; i < NB_FILTERS; i++) 00213 trans[i] = Image<float>(data + (n * n * i), n, n); 00214 ImageSet<float> filter(NB_FILTERS); 00215 for (int i = 0; i < NB_FILTERS; i++) 00216 filter[i] = matrixMult(transpose(hmat), 00217 matrixMult(trans[i], hmat)); 00218 00219 // Compute the dot product of the filters: 00220 int nb_prod = (NB_FILTERS * (NB_FILTERS - 1)) / 2; 00221 ImageSet<float> prod(nb_prod); 00222 int k = 0; 00223 for (int j = 0; j < NB_FILTERS - 1; j++) 00224 for (int i = j + 1; i < NB_FILTERS; i++) 00225 { 00226 prod[k] = filter[j] * filter[i]; 00227 k++; 00228 } 00229 float dotprod = 0.0f; 00230 for (int i = 0; i < nb_prod; i++) 00231 dotprod += sum(prod[i]); 00232 00233 // Inject filterx in Visual Cortex: 00234 for (int i = 0; i < NB_FILTERS; i++) 00235 { 00236 // Create a channel attached to each filter: 00237 nub::soft_ref<ConvolveChannel> channel(new ConvolveChannel(manager)); 00238 00239 channel->setDescriptiveName(sformat("Convolve%d", i)); 00240 channel->setTagName(sformat("conv%d", i)); 00241 00242 channel->exportOptions(MC_RECURSE); // Get our configs 00243 00244 // Assign the 3 filters to the channel: 00245 channel->setFilter(filter[i], CONV_BOUNDARY_ZERO); 00246 00247 // Attach the channel to our visual cortex: 00248 vcx->addSubChan(channel); 00249 } 00250 00251 // Let's get all our ModelComponent instances started: 00252 manager.start(); 00253 00254 // #################################################################### 00255 // Main processing: 00256 00257 // Read the input image: 00258 LINFO("*** Loading image %s", framename.c_str()); 00259 Image< PixRGB<byte> > picture = 00260 Raster::ReadRGB(sformat("%s.ppm", framename.c_str())); 00261 00262 // Process the image through the visual cortex: 00263 vcx->input(InputFrame::fromRgb(&picture)); 00264 00265 // Get the resulting saliency map: 00266 Image<float> sm = vcx->getOutput(); 00267 00268 // Normalize the saliency map: 00269 inplaceNormalize(sm, 0.0f, 255.0f); 00270 Image<byte> smb = sm; 00271 00272 // Save the normalized saliency map: 00273 Raster::WriteGray(smb, sformat("%s-SM.pgm", framename.c_str())); 00274 00275 // Rescale saliency map: 00276 Dims dim = picture.getDims(); 00277 Image< PixRGB<byte> > smr = rescale(smb, dim); 00278 00279 // Get the average saliency: 00280 double avgsal = mean(sm); 00281 00282 // Get the map level to scale things down: 00283 const LevelSpec lspec = vcx->getModelParamVal<LevelSpec>("LevelSpec"); 00284 int sml = lspec.mapLevel(); 00285 00286 // Scale the radius 00287 int radius = RAD >> sml; 00288 00289 // Get the saliency on each end of saccade and draw circles on big sm: 00290 float chsal = 0; 00291 int sc = 1 << sml; 00292 PixRGB<byte> red(200, 0, 0); 00293 PixRGB<byte> green(0, 200, 0); 00294 PixRGB<byte> blue(0, 0, 200); 00295 PixRGB<byte> yellow(200, 200, 0); 00296 for (int i = 0; i < nch; i++) 00297 { 00298 chsal += getLocalMax(sm, ch[i] / sc, radius); 00299 drawCircle(smr, ch[i], RAD, red); 00300 } 00301 chsal /= nch; 00302 float kpsal = 0; 00303 for (int i = 0; i < nkp; i++) 00304 { 00305 kpsal += getLocalMax(sm, kp[i] / sc, radius); 00306 drawCircle(smr, kp[i], RAD, green); 00307 } 00308 kpsal /= nkp; 00309 float pwsal = 0; 00310 for (int i = 0; i < npw; i++) 00311 { 00312 pwsal += getLocalMax(sm, pw[i] / sc, radius); 00313 drawCircle(smr, pw[i], RAD, blue); 00314 } 00315 pwsal /= npw; 00316 float wkmsal = 0; 00317 for (int i = 0; i < nwkm; i++) 00318 { 00319 wkmsal += getLocalMax(sm, wkm[i] / sc, radius); 00320 drawCircle(smr, wkm[i], RAD, yellow); 00321 } 00322 wkmsal /= nwkm; 00323 00324 float goodsal = (chsal + kpsal + pwsal + wkmsal) / 4; 00325 00326 // Save saliency map with circles: 00327 Raster::WriteRGB(smr, sformat("%s-SM_circles.ppm", framename.c_str())); 00328 00329 // Compute the error: 00330 float error = (1 + avgsal) / (1 + goodsal); 00331 float result = error + 0.00001f * fabs(dotprod); 00332 00333 // Save results: 00334 const std::string resname = sformat("%s-score.txt", framename.c_str()); 00335 std::ofstream resultfile(resname.c_str()); 00336 resultfile << "*** " << framename << " ***\n"; 00337 resultfile << "Filters product : " << fabs(dotprod) << "\n"; 00338 resultfile << "Saliency score : " << error << "\n"; 00339 resultfile << "Total score : " << result << "\n"; 00340 resultfile.close(); 00341 00342 { 00343 float fmax = 0.0f; 00344 for (int i = 0; i < NB_FILTERS; i++) 00345 { 00346 float min, max; 00347 getMinMax(filter[i], min, max); 00348 if (fabs(min) > fmax) 00349 fmax = min; 00350 if (fabs(max) > fmax) 00351 fmax = max; 00352 } 00353 if (fmax < 1.0e-10F) 00354 fmax = 1; // images are uniform 00355 float scale = 128.0F / fmax; 00356 for (int i = 0; i < NB_FILTERS; i++) 00357 { 00358 Image<float>::iterator fptr = filter[i].beginw(); 00359 Image<float>::iterator stop = filter[i].endw(); 00360 while (fptr != stop) 00361 { 00362 *fptr = (float)(float(*fptr) * scale); 00363 ++fptr; 00364 } 00365 Image<byte> filterb = filter[i] + 128.0f; 00366 Raster::WriteGray(filterb, sformat("filter%i.pgm", i)); 00367 } 00368 } 00369 00370 // Stop all our ModelComponents 00371 manager.stop(); 00372 00373 // Convolve the picture with the filters and save the results 00374 for (int i = 0; i < NB_FILTERS; i++) 00375 { 00376 ConvolvePyrBuilder<float> pyrb(filter[i], CONV_BOUNDARY_ZERO); 00377 Image<float> pictureg = luminance(picture); 00378 ImageSet<float> pyr = pyrb.build(pictureg, 0, NB_CONV); 00379 for (int j = 0; j < NB_CONV; j++) 00380 { 00381 float min, max, fmax; 00382 getMinMax(pyr[j], min, max); 00383 if (fabs(min) > fabs(max)) 00384 fmax = min; 00385 else 00386 fmax = max; 00387 if (fmax < 1.0e-10F) 00388 fmax = 1; // convolution is uniform 00389 float scale = 128.0F / fmax; 00390 Image<float>::iterator pyrptr = pyr[j].beginw(); 00391 Image<float>::iterator stop = pyr[j].endw(); 00392 while (pyrptr != stop) 00393 { 00394 *pyrptr = (float)(float(*pyrptr) * scale); 00395 ++pyrptr; 00396 } 00397 Image<byte> conv = pyr[j] + 128.0f; 00398 Raster::WriteGray(conv, 00399 sformat("%s_conv%i_filt%i.pgm", 00400 framename.c_str(), j, i)); 00401 } 00402 } 00403 // All done! 00404 return 0; 00405 } 00406 00407 // ###################################################################### 00408 /* So things look consistent in everyone's emacs... */ 00409 /* Local Variables: */ 00410 /* indent-tabs-mode: nil */ 00411 /* End: */