00001 /*!@file INVT/openvision.C version of ezvision.C that uses on-file color 00002 filters */ 00003 00004 // //////////////////////////////////////////////////////////////////// // 00005 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00006 // University of Southern California (USC) and the iLab at USC. // 00007 // See http://iLab.usc.edu for information about this project. // 00008 // //////////////////////////////////////////////////////////////////// // 00009 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00010 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00011 // in Visual Environments, and Applications'' by Christof Koch and // 00012 // Laurent Itti, California Institute of Technology, 2001 (patent // 00013 // pending; application number 09/912,225 filed July 23, 2001; see // 00014 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00015 // //////////////////////////////////////////////////////////////////// // 00016 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00017 // // 00018 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00019 // redistribute it and/or modify it under the terms of the GNU General // 00020 // Public License as published by the Free Software Foundation; either // 00021 // version 2 of the License, or (at your option) any later version. // 00022 // // 00023 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00024 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00025 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00026 // PURPOSE. See the GNU General Public License for more details. // 00027 // // 00028 // You should have received a copy of the GNU General Public License // 00029 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00030 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00031 // Boston, MA 02111-1307 USA. // 00032 // //////////////////////////////////////////////////////////////////// // 00033 // 00034 // Primary maintainer for this file: Laurent Itti <itti@usc.edu> 00035 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/INVT/openvision.C $ 00036 // $Id: openvision.C 10845 2009-02-13 08:49:12Z itti $ 00037 // 00038 00039 #include "Channels/RGBConvolveChannel.H" 00040 #include "Component/ModelManager.H" 00041 #include "Image/ColorOps.H" 00042 #include "Image/MathOps.H" 00043 #include "Image/MatrixOps.H" 00044 #include "Image/Pixels.H" 00045 #include "Image/Transforms.H" 00046 #include "Media/FrameSeries.H" 00047 #include "Neuro/NeuroOpts.H" 00048 #include "Channels/RawVisualCortex.H" 00049 #include "Raster/Raster.H" 00050 #include "Util/sformat.H" 00051 00052 #include <fstream> 00053 00054 #define NB_FILTERS 3 00055 #define NB_COEFFS 8 00056 00057 int main(const int argc, const char **argv) 00058 { 00059 int n = NB_COEFFS; 00060 int m = NB_COEFFS * NB_COEFFS * NB_FILTERS * 3; 00061 00062 MYLOGVERB = LOG_INFO; // Suppress debug messages 00063 00064 // Generate the haar transform matrix: 00065 Image<float> hmat(n, n, ZEROS); 00066 for(int i = 0; i < n; i++) 00067 { 00068 hmat.setVal(i, 0, 1.0f); 00069 } 00070 for(int i = 0; i < n / 2; i++) 00071 { 00072 hmat.setVal(i, 1, 1.0f); 00073 hmat.setVal(i + n / 2, 1, -1.0f); 00074 if (i - 2 < 0) 00075 { 00076 hmat.setVal(i, 2, 1.0f); 00077 hmat.setVal(i + 2, 2, -1.0f); 00078 } 00079 else 00080 { 00081 hmat.setVal(i + 2, 3, 1.0f); 00082 hmat.setVal(i + 4, 3, -1.0f); 00083 } 00084 hmat.setVal(2 * i, i + n / 2, 1.0f); 00085 hmat.setVal(2 * i + 1, i + n / 2, -1.0f); 00086 } 00087 00088 // Instantiate a ModelManager: 00089 ModelManager manager("Open Attention Model"); 00090 00091 // Instantiate our various ModelComponents: 00092 nub::soft_ref<RawVisualCortex> vcx(new RawVisualCortex(manager)); 00093 manager.addSubComponent(vcx); 00094 00095 // let's make one dummy RGBConvolveChannel so that we get the 00096 // command-line options for it: 00097 nub::soft_ref<RGBConvolveChannel> channel(new RGBConvolveChannel(manager)); 00098 vcx->addSubChan(channel); 00099 00100 // Parse command-line: 00101 if (manager.parseCommandLine(argc, argv, "<data.txt> <image.ppm>", 00102 2, -1) == false) 00103 return(1); 00104 00105 // Ok, get rid of our placeholder channel; the manager will keep a 00106 // trace of its configured options: 00107 vcx->removeAllSubChans(); 00108 00109 // Get the input image name: 00110 char framename[1024]; 00111 strncpy(framename, manager.getExtraArg(1).c_str(), 1023); 00112 00113 // Load data: 00114 float data[m]; 00115 char dataname[1024]; 00116 strncpy(dataname, manager.getExtraArg(0).c_str(), 1023); 00117 std::ifstream inputfile (dataname); 00118 if (inputfile.is_open()) 00119 { 00120 for (int j = 0; j < m; j++) 00121 inputfile >> data[j]; 00122 inputfile.close(); 00123 } 00124 else 00125 { 00126 LERROR("*** Cannot open input file !"); 00127 return 1; 00128 } 00129 00130 // Convert data into filters: 00131 ImageSet<float> trans(NB_FILTERS * 3); 00132 for (int i = 0; i < NB_FILTERS * 3; i++) 00133 trans[i] = Image<float>(data + (n * n * i), n, n); 00134 ImageSet<float> filter(NB_FILTERS * 3); 00135 Dims filterdim(8, 8); 00136 for (int i = 0; i < NB_FILTERS * 3; i++) 00137 filter[i] = scaleBlock(matrixMult(transpose(hmat), 00138 matrixMult(trans[i], hmat)), 00139 filterdim); 00140 00141 for (int i = 0; i < NB_FILTERS; i++) 00142 { 00143 Image<float> rf = filter[3 * i]; 00144 Image<float> gf = filter[3 * i + 1]; 00145 Image<float> bf = filter[3 * i + 2]; 00146 float min, max, fmax; 00147 getMinMax(rf, min, max); 00148 if (fabs(min) > fabs(max)) 00149 fmax = min; 00150 else 00151 fmax = max; 00152 getMinMax(gf, min, max); 00153 if (fabs(min) > fmax) 00154 fmax = min; 00155 if (fabs(max) > fmax) 00156 fmax = max; 00157 getMinMax(bf, min, max); 00158 if (fabs(min) > fmax) 00159 fmax = min; 00160 if (fabs(max) > fmax) 00161 fmax = max; 00162 if (fmax < 1.0e-10F) 00163 fmax = 1; // images are uniform 00164 float scale = 128.0F / fmax; 00165 Image<float>::iterator rptr = rf.beginw(); 00166 Image<float>::iterator gptr = gf.beginw(); 00167 Image<float>::iterator bptr = bf.beginw(); 00168 Image<float>::iterator stop = rf.endw(); 00169 while (rptr != stop) 00170 { 00171 *rptr = (float)(float(*rptr) * scale); 00172 *gptr = (float)(float(*gptr) * scale); 00173 *bptr = (float)(float(*bptr) * scale); 00174 ++rptr; 00175 ++gptr; 00176 ++bptr; 00177 } 00178 Image< PixRGB<byte> > color_filter = makeRGB(rf + 128.0F, 00179 gf + 128.0F, 00180 bf + 128.0F); 00181 Raster::WriteRGB(color_filter, sformat("filter%i.ppm", i)); 00182 } 00183 00184 for (int i = 0; i < NB_FILTERS; i++) 00185 { 00186 // Create a channel attached to each filter: 00187 nub::soft_ref<RGBConvolveChannel> channel(new RGBConvolveChannel(manager)); 00188 00189 channel->setDescriptiveName(sformat("RGBConvolve%d", i)); 00190 channel->setTagName(sformat("rgbconv%d", i)); 00191 00192 channel->exportOptions(MC_RECURSE); // Get our configs 00193 00194 // const char *filtername = manager.getExtraArg(i).c_str(); 00195 // FILE *f = fopen(filtername, "r"); 00196 // if (f == NULL) LFATAL("Cannot open %s", filtername); 00197 00198 // // Scan the filter file to get the 3 kernels: 00199 // int w, h; 00200 // if (fscanf(f, "%d %d\n", &w, &h) != 2) 00201 // LFATAL("Bogus first line in %s", filtername); 00202 // LINFO("Building %dx%d RGB kernel from '%s'", w, h, filtername); 00203 // Image<float> rker(w, h, NO_INIT); 00204 // for (int j = 0; j < h; j ++) 00205 // for (int i = 0; i < w; i ++) { 00206 // float coeff; 00207 // if (fscanf(f, "%f\n", &coeff) != 1) 00208 // LFATAL("Bogus coeff in %s at red (%d, %d)", 00209 // filtername, i, j); 00210 // rker.setVal(i, j, coeff); 00211 // } 00212 // Image<float> gker(w, h, NO_INIT); 00213 // for (int j = 0; j < h; j ++) 00214 // for (int i = 0; i < w; i ++) { 00215 // float coeff; 00216 // if (fscanf(f, "%f\n", &coeff) != 1) 00217 // LFATAL("Bogus coeff in %s at green (%d, %d)", 00218 // filtername, i, j); 00219 // gker.setVal(i, j, coeff); 00220 // } 00221 // Image<float> bker(w, h, NO_INIT); 00222 // for (int j = 0; j < h; j ++) 00223 // for (int i = 0; i < w; i ++) { 00224 // float coeff; 00225 // if (fscanf(f, "%f\n", &coeff) != 1) 00226 // LFATAL("Bogus coeff in %s at blue (%d, %d)", 00227 // filtername, i, j); 00228 // bker.setVal(i, j, coeff); 00229 // } 00230 00231 // Assign the 3 filters to the channel: 00232 channel->setFilters(filter[3 * i], filter[3 * i + 1], 00233 filter[3 * i + 2], 00234 CONV_BOUNDARY_ZERO); 00235 00236 // Attach the channel to our visual cortex: 00237 vcx->addSubChan(channel); 00238 } 00239 00240 // Let's get all our ModelComponent instances started: 00241 manager.start(); 00242 00243 // #################################################################### 00244 // Main processing: 00245 00246 // Read the input image: 00247 LINFO("*** Loading image %s", framename); 00248 Image< PixRGB<byte> > picture = Raster::ReadRGB(framename, RASFMT_PNM); 00249 00250 // Process the image through the visual cortex: 00251 vcx->input(InputFrame::fromRgb(&picture)); 00252 00253 // Get the resulting saliency map: 00254 Image<float> sm = vcx->getOutput(); 00255 00256 // Normalize the saliency map: 00257 inplaceNormalize(sm, 0.0f, 255.0f); 00258 Image<byte> smb = sm; 00259 00260 // Save the normalized saliency map: 00261 int i = strlen(framename) - 1; while(i > 0 && framename[i] != '.') i--; 00262 framename[i] = '\0'; // Remove input file extension 00263 LINFO("*** Saving '%s-SM.pgm'...", framename); 00264 Raster::WriteGray(smb, sformat("%s-SM.pgm", framename)); 00265 00266 // // Chamfer the binary mask, rescale it to the saliency map's size, 00267 // // and invert it: 00268 // Dims dim = smb.getDims(); 00269 // Image<byte> blur_mask = binaryReverse(chamfer34(mask, (byte) 255), 00270 // (byte) 255); 00271 // inplaceLowThresh(blur_mask, (byte) 200); 00272 // Image<byte> mask_in = scaleBlock(blur_mask, dim); 00273 // Image<byte> mask_out = binaryReverse(mask_in, (byte) 255); 00274 00275 // // Weight the saliency map using the in and out masks: 00276 // Image<float> smb_in = (mask_in * (1.0f / 255.0f)) * smb; 00277 // Image<float> smb_out = (mask_out * (1.0f / 255.0f)) * smb; 00278 00279 // // Get the max_in and max_out values: 00280 // float max_in, max_out, min; 00281 // getMinMax(smb_in, min, max_in); 00282 // getMinMax(smb_out, min, max_out); 00283 00284 // // Compute the error: 00285 // float detect_coeff = 1.0f - ((max_in - max_out) / 255.0f); 00286 // float error_val = detect_coeff * detect_coeff; 00287 // // Display the error value: 00288 // std::cout << error_val << std::endl; 00289 00290 // Save the result: 00291 // LINFO("*** Saving 'max_in_out.txt'...", max_in, max_out); 00292 // FILE *result = fopen("max_in_out.txt", "w"); 00293 //fprintf(result, "%f %f", max_in, max_out); 00294 00295 // Stop all our ModelComponents 00296 manager.stop(); 00297 00298 // Convolve the picture with the filters and save the results 00299 for (int i = 0; i < NB_FILTERS; i++) 00300 { 00301 RGBConvolvePyrBuilder<float> rgbcpb(filter[3 * i], 00302 filter[3 * i + 1], 00303 filter[3 * i + 2], 00304 CONV_BOUNDARY_ZERO); 00305 ImageSet< PixRGB<float> > rgbpyr = rgbcpb.build2(picture, 0, 4); 00306 for (int j = 0; j < 4; j++) 00307 { 00308 Image<float> rc, gc, bc; 00309 getComponents(rgbpyr[j], rc, gc, bc); 00310 float min, max, fmax; 00311 getMinMax(rc, min, max); 00312 if (fabs(min) > fabs(max)) 00313 fmax = min; 00314 else 00315 fmax = max; 00316 getMinMax(gc, min, max); 00317 if (fabs(min) > fmax) 00318 fmax = min; 00319 if (fabs(max) > fmax) 00320 fmax = max; 00321 getMinMax(bc, min, max); 00322 if (fabs(min) > fmax) 00323 fmax = min; 00324 if (fabs(max) > fmax) 00325 fmax = max; 00326 if (fmax < 1.0e-10F) 00327 fmax = 1; // images are uniform 00328 float scale = 255.0F / fmax; 00329 Image<float>::iterator rptr = rc.beginw(); 00330 Image<float>::iterator gptr = gc.beginw(); 00331 Image<float>::iterator bptr = bc.beginw(); 00332 Image<float>::iterator stop = rc.endw(); 00333 while (rptr != stop) 00334 { 00335 *rptr = (float)(float(*rptr) * scale); 00336 *gptr = (float)(float(*gptr) * scale); 00337 *bptr = (float)(float(*bptr) * scale); 00338 ++rptr; 00339 ++gptr; 00340 ++bptr; 00341 } 00342 Image<float> prc, nrc, pgc, ngc, pbc, nbc; 00343 splitPosNeg(rc, prc, nrc); 00344 splitPosNeg(gc, pgc, ngc); 00345 splitPosNeg(bc, pbc, nbc); 00346 rgbpyr[j] = makeRGB((rc / 2.0F) + 128.0F, 00347 (gc / 2.0F) + 128.0F, 00348 (bc / 2.0F) + 128.0F); 00349 Image< PixRGB<byte> > rgbpyrb = rgbpyr[j]; 00350 Raster::WriteRGB(rgbpyrb, sformat("%s/conv-f%i-l%i.ppm", 00351 framename, i, j)); 00352 rgbpyr[j] = makeRGB(prc, pgc, pbc); 00353 rgbpyrb = rgbpyr[j]; 00354 Raster::WriteRGB(rgbpyrb, sformat("%s/conv-f%i-l%i-pos.ppm", 00355 framename, i, j)); 00356 rgbpyr[j] = makeRGB(nrc, ngc, nbc); 00357 rgbpyrb = rgbpyr[j]; 00358 Raster::WriteRGB(rgbpyrb, sformat("%s/conv-f%i-l%i-neg.ppm", 00359 framename, i, j)); 00360 } 00361 } 00362 00363 // All done! 00364 return 0; 00365 } 00366 00367 // ###################################################################### 00368 /* So things look consistent in everyone's emacs... */ 00369 /* Local Variables: */ 00370 /* indent-tabs-mode: nil */ 00371 /* End: */