00001 /*!@file TIGS/PyramidFeatureExtractor.C */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00005 // by the University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Rob Peters <rjpeters at usc dot edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/TIGS/PyramidFeatureExtractor.C $ 00035 // $Id: PyramidFeatureExtractor.C 12546 2010-01-12 15:46:00Z sychic $ 00036 // 00037 00038 #ifndef TIGS_PYRAMIDFEATUREEXTRACTOR_C_DEFINED 00039 #define TIGS_PYRAMIDFEATUREEXTRACTOR_C_DEFINED 00040 00041 #include "TIGS/PyramidFeatureExtractor.H" 00042 00043 #include "Image/CutPaste.H" 00044 #include "Image/ImageSet.H" 00045 #include "Image/PyramidOps.H" 00046 #include "Image/ShapeOps.H" // for downSize() 00047 #include "TIGS/Drawing.H" 00048 #include "TIGS/TigsOpts.H" 00049 #include "Transport/FrameOstream.H" 00050 #include "Util/sformat.H" 00051 #include "rutz/trace.h" 00052 00053 namespace 00054 { 00055 float* insertLocalAvg(const Image<float>& img, 00056 float* p, float* const stop, 00057 const double factor) 00058 { 00059 Image<float> ds = downSize(img, 4, 4, 3); 00060 00061 for (int i = 0; i < ds.getSize() && p < stop; ++i, ++p) 00062 *p = factor * ds[i]; 00063 00064 return p; 00065 } 00066 00067 float* insertLocalMax(const Image<float>& img, 00068 float* p, float* const stop, 00069 const double factor) 00070 { 00071 00072 if (p+16 > stop) 00073 return p; 00074 00075 for (int y = 0; y < img.getHeight(); ++y) 00076 for (int x = 0; x < img.getWidth(); ++x) 00077 { 00078 const float val = factor * img[Point2D<int>(x,y)]; 00079 const int pos = 00080 (y*4 / img.getHeight()) * 4 + 00081 (x*4 / img.getWidth()); 00082 00083 p[pos] = std::max(p[pos], val); 00084 } 00085 00086 return p+16; 00087 } 00088 00089 float* insertLocalVar(const Image<float>& img, 00090 float* p, float* const stop, 00091 const double factor) 00092 { 00093 00094 if (p+16 > stop) 00095 return p; 00096 00097 float ss[16] = { 0.0f }, ssq[16] = { 0.0f }; 00098 int N[16] = { 0 }; 00099 00100 for (int y = 0; y < img.getHeight(); ++y) 00101 for (int x = 0; x < img.getWidth(); ++x) 00102 { 00103 const double val = img[Point2D<int>(x,y)]; 00104 const int pos = 00105 (y*4 / img.getHeight()) * 4 + 00106 (x*4 / img.getWidth()); 00107 00108 ssq[pos] += val*val; 00109 ss[pos] += val; 00110 ++N[pos]; 00111 } 00112 00113 for (int i = 0; i < 16; ++i) 00114 { 00115 if (N[i] > 1 && (ss[i]/N[i]) > 0.0f) 00116 { 00117 const float numer1 = (ssq[i] - (ss[i]*ss[i]/N[i])); 00118 const float denom1 = (N[i]-1); 00119 00120 if (numer1 > 0.0f && denom1 > 0.0f) 00121 // coefficient of variation = 100*stdev/mean 00122 p[i] = factor*100.0f*sqrt(numer1/denom1)/(ss[i]/N[i]); 00123 } 00124 else 00125 { 00126 p[i] = 0.0f; 00127 } 00128 } 00129 00130 return p+16; 00131 } 00132 00133 float* insertPyrFeatures(const ImageSet<float>& pyr, 00134 float* p, float* const stop, 00135 const double factor) 00136 { 00137 p = insertLocalAvg(pyr[2], p, stop, factor); 00138 p = insertLocalVar(pyr[2], p, stop, 1.5); 00139 p = insertLocalAvg(pyr[5], p, stop, factor); 00140 p = insertLocalVar(pyr[5], p, stop, 1.5); 00141 00142 return p; 00143 } 00144 00145 Image<PixRGB<byte> > illustrate1(const ImageSet<float>& pyr, 00146 const double factor, 00147 const char* name, 00148 const PixRGB<byte>& bg) 00149 { 00150 using tigs::labelImage; 00151 using tigs::boxify; 00152 00153 const PixRGB<byte> red(255, 64, 64); 00154 const PixRGB<byte> green(96, 192, 96); 00155 const PixRGB<byte> blue(128, 128, 255); 00156 const PixRGB<byte> yellow(160, 160, 0); 00157 00158 const Image<PixRGB<byte> > top = // 256x256 00159 labelImage(boxify(pyr[1] * float(factor), 8, green), 00160 name, green, bg); 00161 00162 const Image<PixRGB<byte> > mid = // 256x128 00163 concatX(labelImage(boxify(pyr[2] * float(factor), 4, yellow), "fine", yellow, bg), 00164 labelImage(boxify(zoomXY(pyr[5] * float(factor), 8, 8), 4, yellow), "coarse", yellow, bg)); 00165 00166 Image<float> avg2(4,4,NO_INIT); 00167 Image<float> var2(4,4,NO_INIT); 00168 Image<float> avg5(4,4,NO_INIT); 00169 Image<float> var5(4,4,NO_INIT); 00170 00171 insertLocalAvg(pyr[2], &avg2[0], &avg2[0]+16, factor); 00172 insertLocalVar(pyr[2], &var2[0], &var2[0]+16, 1.5); 00173 insertLocalAvg(pyr[5], &avg5[0], &avg5[0]+16, factor); 00174 insertLocalVar(pyr[5], &var5[0], &var5[0]+16, 1.5); 00175 00176 Image<PixRGB<byte> > cavg2 = avg2; 00177 Image<PixRGB<byte> > cvar2 = var2; 00178 Image<PixRGB<byte> > cavg5 = avg5; 00179 Image<PixRGB<byte> > cvar5 = var5; 00180 00181 const Image<PixRGB<byte> > low2 = 00182 concatX(labelImage(boxify(zoomXY(cavg2, 16, 16), 2, red), "mean", red, bg), 00183 labelImage(boxify(zoomXY(cvar2, 16, 16), 2, blue), "var", blue, bg)); 00184 00185 const Image<PixRGB<byte> > low5 = 00186 concatX(labelImage(boxify(zoomXY(cavg5, 16, 16), 2, red), "mean", red, bg), 00187 labelImage(boxify(zoomXY(cvar5, 16, 16), 2, blue), "var", blue, bg)); 00188 00189 const Image<PixRGB<byte> > low = concatX(low2, low5); 00190 00191 return concatY(concatY(top, mid), low); 00192 } 00193 00194 void saveRaw1(const ImageSet<float>& pyr, 00195 const double factor, 00196 const char* name, 00197 FrameOstream& ofs) 00198 { 00199 // ofs.writeFloat(pyr[0], FLOAT_NORM_PRESERVE, 00200 // (sformat("%s-base", name))); 00201 00202 // ofs.writeFloat(pyr[2], FLOAT_NORM_PRESERVE, 00203 // (sformat("%s-fine", name))); 00204 00205 // ofs.writeFloat(pyr[5], FLOAT_NORM_PRESERVE, 00206 // (sformat("%s-coarse", name))); 00207 00208 Image<float> avg2(4,4,NO_INIT); 00209 Image<float> var2(4,4,NO_INIT); 00210 Image<float> avg5(4,4,NO_INIT); 00211 Image<float> var5(4,4,NO_INIT); 00212 00213 insertLocalAvg(pyr[2], &avg2[0], &avg2[0]+16, factor); 00214 insertLocalVar(pyr[2], &var2[0], &var2[0]+16, 1.5); 00215 insertLocalAvg(pyr[5], &avg5[0], &avg5[0]+16, factor); 00216 insertLocalVar(pyr[5], &var5[0], &var5[0]+16, 1.5); 00217 00218 ofs.writeFloat(avg2, FLOAT_NORM_PRESERVE, 00219 (sformat("%s-fine-avg", name))); 00220 00221 ofs.writeFloat(var2, FLOAT_NORM_PRESERVE, 00222 (sformat("%s-fine-var", name))); 00223 00224 ofs.writeFloat(avg5, FLOAT_NORM_PRESERVE, 00225 (sformat("%s-coarse-avg", name))); 00226 00227 ofs.writeFloat(var5, FLOAT_NORM_PRESERVE, 00228 (sformat("%s-coarse-var", name))); 00229 } 00230 } 00231 00232 PyramidFeatureExtractor::PyramidFeatureExtractor(OptionManager& mgr) : 00233 FeatureExtractor(mgr, "pfx"), 00234 itsSaveIllustrations(&OPT_FxSaveIllustrations, this), 00235 itsSaveRawMaps(&OPT_FxSaveRawMaps, this) 00236 {} 00237 00238 PyramidFeatureExtractor::~PyramidFeatureExtractor() {} 00239 00240 Image<PixRGB<byte> > PyramidFeatureExtractor:: 00241 illustrate(const TigsInputFrame& fin) const 00242 { 00243 if (fin.isGhost()) 00244 LFATAL("PyramidFeatureExtractor needs non-ghost frames"); 00245 00246 const PixRGB<byte> bg(255, 255, 255); 00247 00248 Image<PixRGB<byte> > top = 00249 illustrate1(buildPyrGaussian(fin.lum(), 0, 10, 9), 1.0, "luminance", bg); 00250 00251 top = concatX(top, 00252 illustrate1(buildPyrGaussian(fin.rg(), 0, 10, 9), 1.0, "red/green", bg)); 00253 00254 top = concatX(top, 00255 illustrate1(buildPyrGaussian(fin.by(), 0, 10, 9), 1.0, "blue/yellow", bg)); 00256 00257 const double angles[] = { 0.0, 45.0, 90.0, 135.0 }; 00258 00259 Image<PixRGB<byte> > bottom; 00260 00261 for (int i = 0; i < 4; i += 2) 00262 { 00263 Image<PixRGB<byte> > x = 00264 illustrate1(buildPyrOriented(fin.lum(), 0, 10, 9, 00265 angles[i], 15.0), 00266 5.0, 00267 sformat("%d degrees", int(angles[i])).c_str(), bg); 00268 00269 if (bottom.initialized()) 00270 bottom = concatX(bottom, x); 00271 else 00272 bottom = x; 00273 } 00274 00275 Image<PixRGB<byte> > result = concatX(top, bottom); 00276 return rescale(result, 00277 int(0.75*result.getWidth()), 00278 int(0.75*result.getHeight())); 00279 } 00280 00281 void PyramidFeatureExtractor:: 00282 saveRawIllustrationParts(const TigsInputFrame& fin, 00283 FrameOstream& ofs) const 00284 { 00285 saveRaw1(buildPyrGaussian(fin.lum(), 0, 10, 9), 1.0, "luminance", ofs); 00286 00287 saveRaw1(buildPyrGaussian(fin.rg(), 0, 10, 9), 1.0, "red-green", ofs); 00288 00289 saveRaw1(buildPyrGaussian(fin.by(), 0, 10, 9), 1.0, "blue-yellow", ofs); 00290 00291 const double angles[] = { 0.0, 45.0, 90.0, 135.0 }; 00292 00293 for (int i = 0; i < 4; ++i) 00294 { 00295 saveRaw1(buildPyrOriented(fin.lum(), 0, 10, 9, 00296 angles[i], 15.0), 00297 5.0, 00298 sformat("ori%d", int(angles[i])).c_str(), 00299 ofs); 00300 } 00301 } 00302 00303 void PyramidFeatureExtractor::saveResults(const TigsInputFrame& fin, 00304 FrameOstream& ofs) const 00305 { 00306 if (itsSaveIllustrations.getVal()) 00307 ofs.writeRGB(this->illustrate(fin), "pfx"); 00308 00309 if (itsSaveRawMaps.getVal()) 00310 this->saveRawIllustrationParts(fin, ofs); 00311 } 00312 00313 Image<float> PyramidFeatureExtractor::doExtract(const TigsInputFrame& fin) 00314 { 00315 GVX_TRACE(__PRETTY_FUNCTION__); 00316 00317 if (fin.isGhost()) 00318 LFATAL("PyramidFeatureExtractor needs non-ghost frames"); 00319 00320 Image<float> result(4, 112, ZEROS); 00321 00322 float* p = result.getArrayPtr(); 00323 float* const stop = p + result.getSize(); 00324 00325 { 00326 GVX_TRACE("PyramidFeatureExtractor::extract-laplacian"); 00327 00328 p = insertPyrFeatures(buildPyrGaussian(fin.lum(), 0, 10, 9), 00329 p, stop, 1.0); 00330 00331 p = insertPyrFeatures(buildPyrGaussian(fin.rg(), 0, 10, 9), 00332 p, stop, 1.0); 00333 00334 p = insertPyrFeatures(buildPyrGaussian(fin.by(), 0, 10, 9), 00335 p, stop, 1.0); 00336 } 00337 00338 const double angles[] = { 0.0, 45.0, 90.0, 135.0 }; 00339 00340 for (int i = 0; i < 4; ++i) 00341 { 00342 GVX_TRACE("PyramidFeatureExtractor::extract-oriented"); 00343 00344 p = insertPyrFeatures(buildPyrOriented(fin.lum(), 0, 10, 9, 00345 angles[i], 15.0), 00346 p, stop, 15.0); 00347 } 00348 00349 return result; 00350 } 00351 00352 // ###################################################################### 00353 /* So things look consistent in everyone's emacs... */ 00354 /* Local Variables: */ 00355 /* mode: c++ */ 00356 /* indent-tabs-mode: nil */ 00357 /* End: */ 00358 00359 #endif // TIGS_PYRAMIDFEATUREEXTRACTOR_C_DEFINED