00001 /*!@file SceneUnderstanding/V1.C */ 00002 00003 00004 // //////////////////////////////////////////////////////////////////// // 00005 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00006 // by the University of Southern California (USC) and the iLab at USC. // 00007 // See http://iLab.usc.edu for information about this project. // 00008 // //////////////////////////////////////////////////////////////////// // 00009 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00010 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00011 // in Visual Environments, and Applications'' by Christof Koch and // 00012 // Laurent Itti, California Institute of Technology, 2001 (patent // 00013 // pending; application number 09/912,225 filed July 23, 2001; see // 00014 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00015 // //////////////////////////////////////////////////////////////////// // 00016 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00017 // // 00018 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00019 // redistribute it and/or modify it under the terms of the GNU General // 00020 // Public License as published by the Free Software Foundation; either // 00021 // version 2 of the License, or (at your option) any later version. // 00022 // // 00023 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00024 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00025 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00026 // PURPOSE. See the GNU General Public License for more details. // 00027 // // 00028 // You should have received a copy of the GNU General Public License // 00029 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00030 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00031 // Boston, MA 02111-1307 USA. // 00032 // //////////////////////////////////////////////////////////////////// // 00033 // 00034 // Primary maintainer for this file: Lior Elazary <elazary@usc.edu> 00035 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/plugins/SceneUnderstanding/V1.C $ 00036 // $Id: V1.C 14350 2010-12-28 20:01:35Z lior $ 00037 // 00038 00039 #ifndef V1_C_DEFINED 00040 #define V1_C_DEFINED 00041 00042 #include "plugins/SceneUnderstanding/V1.H" 00043 00044 #include "Image/DrawOps.H" 00045 #include "Image/MathOps.H" 00046 //#include "Image/OpenCVUtil.H" 00047 #include "Image/Kernels.H" 00048 #include "Image/FilterOps.H" 00049 #include "Image/Convolutions.H" 00050 #include "Image/fancynorm.H" 00051 #include "Image/Point3D.H" 00052 #include "Simulation/SimEventQueue.H" 00053 #include "Neuro/EnvVisualCortex.H" 00054 #include "GUI/DebugWin.H" 00055 #include <math.h> 00056 #include <fcntl.h> 00057 #include <limits> 00058 #include <string> 00059 00060 const ModelOptionCateg MOC_V1 = { 00061 MOC_SORTPRI_3, "V1-Related Options" }; 00062 00063 // Used by: SimulationViewerEyeMvt 00064 const ModelOptionDef OPT_V1ShowDebug = 00065 { MODOPT_ARG(bool), "V1ShowDebug", &MOC_V1, OPTEXP_CORE, 00066 "Show debug img", 00067 "v1-debug", '\0', "<true|false>", "false" }; 00068 00069 00070 //Define the inst function name 00071 SIMMODULEINSTFUNC(V1); 00072 00073 // ###################################################################### 00074 V1::V1(OptionManager& mgr, const std::string& descrName, 00075 const std::string& tagName) : 00076 SimModule(mgr, descrName, tagName), 00077 SIMCALLBACK_INIT(SimEventLGNOutput), 00078 SIMCALLBACK_INIT(SimEventV1Bias), 00079 SIMCALLBACK_INIT(SimEventSaveOutput), 00080 SIMCALLBACK_INIT(SimEventUserInput), 00081 itsShowDebug(&OPT_V1ShowDebug, this), 00082 itsThreshold(0.10), 00083 itsBiasThreshold(0.05), 00084 itsAngBias(0) 00085 00086 { 00087 itsAttenLoc.i = -1; 00088 itsAttenLoc.j = -1; 00089 00090 //itsAttenLoc.i = 892; 00091 //itsAttenLoc.j = 332; 00092 00093 //itsAttenLoc.i = 467; 00094 //itsAttenLoc.j = 27; 00095 00096 itsWinSize = Dims(320,240); 00097 00098 00099 00100 } 00101 00102 // ###################################################################### 00103 V1::~V1() 00104 { 00105 00106 } 00107 00108 // ###################################################################### 00109 void V1::onSimEventLGNOutput(SimEventQueue& q, 00110 rutz::shared_ptr<SimEventLGNOutput>& e) 00111 { 00112 itsTimer.reset(); 00113 itsLGNData = e->getCells(); 00114 00115 //Dims imgSize = itsLGNData[0].getDims(); 00116 //for(int y=0; y<imgSize.h(); y+=25) 00117 // for(int x=0; x<imgSize.w(); x+=25) 00118 // { 00119 // itsSpatialBias.push_back(SpatialBias(x,y,50,50, 0.10)); 00120 // } 00121 00122 evolve(q); 00123 00124 00125 00126 } 00127 00128 // ###################################################################### 00129 void V1::onSimEventSaveOutput(SimEventQueue& q, rutz::shared_ptr<SimEventSaveOutput>& e) 00130 { 00131 if (itsShowDebug.getVal()) 00132 { 00133 // get the OFS to save to, assuming sinfo is of type 00134 // SimModuleSaveInfo (will throw a fatal exception otherwise): 00135 nub::ref<FrameOstream> ofs = 00136 dynamic_cast<const SimModuleSaveInfo&>(e->sinfo()).ofs; 00137 Layout<PixRGB<byte> > disp = getDebugImage(); 00138 ofs->writeRgbLayout(disp, "V1", FrameInfo("V1", SRC_POS)); 00139 } 00140 } 00141 00142 00143 void V1::setBias(const Image<float> &biasImg) 00144 { 00145 00146 } 00147 00148 // ###################################################################### 00149 void V1::onSimEventV1Bias(SimEventQueue& q, 00150 rutz::shared_ptr<SimEventV1Bias>& e) 00151 { 00152 itsSpatialBias = e->getSpatialBias(); 00153 itsTimer.mark(); 00154 printf("Total time %0.2f sec\n", itsTimer.real_secs()); 00155 fflush(stdout); 00156 00157 evolve(q); 00158 00159 //LINFO("Show V1"); 00160 //Layout<PixRGB<byte> > layout = getDebugImage(); 00161 //Image<PixRGB<byte> > tmp = layout.render(); 00162 //SHOWIMG(tmp); 00163 00164 } 00165 00166 void V1::onSimEventUserInput(SimEventQueue& q, rutz::shared_ptr<SimEventUserInput>& e) 00167 { 00168 LINFO("Got event %s %ix%i key=%i", 00169 e->getWinName(), 00170 e->getMouseClick().i, 00171 e->getMouseClick().j, 00172 e->getKey()); 00173 00174 00175 if (strcmp(e->getWinName(), "V1")) 00176 return; 00177 00178 switch(e->getKey()) 00179 { 00180 case 10: //1 00181 itsThreshold += 0.01; 00182 if (itsThreshold > 1) itsThreshold = 1; 00183 break; 00184 case 24: //q 00185 itsThreshold -= 0.01; 00186 if (itsThreshold < 0) itsThreshold = 0; 00187 break; 00188 case 11: //1 00189 itsBiasThreshold += 0.01; 00190 if (itsBiasThreshold > 1) itsBiasThreshold = 1; 00191 break; 00192 case 25: //q 00193 itsBiasThreshold -= 0.01; 00194 if (itsBiasThreshold < 0) itsBiasThreshold = 0; 00195 break; 00196 default: 00197 break; 00198 } 00199 00200 00201 if (e->getMouseClick().isValid()) 00202 { 00203 LINFO("Set spatial bias"); 00204 itsAttenLoc = e->getMouseClick(); 00205 //itsSpatialBias.loc = e->getMouseClick(); 00206 //itsSpatialBias.dims = Dims(50,50); 00207 //itsSpatialBias.threshold = itsBiasThreshold; 00208 } 00209 00210 evolve(q); 00211 00212 } 00213 00214 00215 // ###################################################################### 00216 void V1::evolve(SimEventQueue& q) 00217 { 00218 //evolveGabor(); 00219 // evolveSobel(); 00220 // evolveCanny(); 00221 evolveTensor(); 00222 //Layout<PixRGB<byte> > layout = getDebugImage(); 00223 //Image<PixRGB<byte> > tmp = layout.render(); 00224 //SHOWIMG(tmp); 00225 00226 q.post(rutz::make_shared(new SimEventV1Output(this, itsEdgesState))); 00227 } 00228 00229 void V1::evolveTensor() 00230 { 00231 Image<float> d1; 00232 Image<float> d2; 00233 Image<float> d3; 00234 00235 if (itsAttenLoc.isValid()) 00236 { 00237 d1 = crop(itsLGNData[0], itsAttenLoc, itsWinSize); 00238 d2 = crop(itsLGNData[1], itsAttenLoc, itsWinSize); 00239 d3 = crop(itsLGNData[2], itsAttenLoc, itsWinSize); 00240 } else { 00241 d1 = itsLGNData[0]; 00242 d2 = itsLGNData[1]; 00243 d3 = itsLGNData[2]; 00244 } 00245 00246 00247 itsInput = d1; 00248 00249 00250 itsEdgesState.lumTensorField = getTensor(d1,3); 00251 itsEdgesState.rgTensorField = getTensor(d2,3); 00252 itsEdgesState.byTensorField = getTensor(d3,3); 00253 00254 //Non maximal supperssion 00255 nonMaxSurp(itsEdgesState.lumTensorField); 00256 nonMaxSurp(itsEdgesState.rgTensorField); 00257 nonMaxSurp(itsEdgesState.byTensorField); 00258 00259 00260 LINFO("Bias size %i", (int)itsSpatialBias.size()); 00261 ////Extract edges by keeping only the edges with values greater 00262 ////then 10% of the max mag. 00263 applyThreshold(itsEdgesState.lumTensorField, itsSpatialBias); 00264 applyThreshold(itsEdgesState.rgTensorField, itsSpatialBias); 00265 applyThreshold(itsEdgesState.byTensorField, itsSpatialBias); 00266 00267 } 00268 00269 void V1::applyThreshold(TensorField& tensorField, std::vector<SpatialBias>& spatialBias) 00270 { 00271 00272 Image<float> mag = getTensorMag(tensorField); 00273 float min, max; 00274 getMinMax(mag, min,max); 00275 00276 for(int y=0; y<mag.getHeight(); y++) 00277 for(int x=0; x<mag.getWidth(); x++) 00278 { 00279 bool biased = false; 00280 00281 for(uint i=0; i<spatialBias.size(); i++) 00282 { 00283 if (spatialBias[i].contains(x,y)) 00284 { 00285 if (mag.getVal(x,y) < max*spatialBias[i].threshold) 00286 tensorField.setVal(x,y,0); 00287 biased = true; 00288 } 00289 } 00290 00291 if (!biased) 00292 { 00293 if (mag.getVal(x,y) < max*itsThreshold) 00294 tensorField.setVal(x,y,0); 00295 } 00296 00297 } 00298 } 00299 00300 //// ###################################################################### 00301 //void V1::evolveSobel() 00302 //{ 00303 // 00304 // Image<float> magImg, oriImg; 00305 // //for(uint i=0; i<itsV1CellsInput.size(); i++) 00306 // 00307 // for(uint i=0; i<1; i++) 00308 // { 00309 // gradientSobel(itsV1CellsInput[i], magImg, oriImg); 00310 // Image<float> edgeImg(magImg.getDims(), ZEROS); 00311 // 00312 // itsEdgesState.clear(); 00313 // 00314 // for(int y=0; y<magImg.getHeight(); y++) 00315 // for(int x=0; x<magImg.getWidth(); x++) 00316 // { 00317 // float edgeProb = magImg.getVal(x,y)/200; //1.0F/(1.0F + expf(0.09*(30.0-magImg.getVal(x,y)))); 00318 // if (edgeProb > 1.0) edgeProb = 1.0; 00319 // if (edgeProb > 0.0) 00320 // { 00321 // EdgeState edgeState; 00322 // edgeState.pos = Point2D<int>(x,y); 00323 // edgeState.ori = oriImg.getVal(x,y); 00324 // edgeState.var = (10*M_PI/180)*(10*M_PI/180); //10 00325 // edgeState.prob = edgeProb; 00326 // 00327 // itsEdgesState.push_back(edgeState); 00328 // } 00329 // 00330 // //Build the edgeDistance with a threshold 00331 // if (edgeProb > 0.25) 00332 // edgeImg.setVal(Point2D<int>(x,y), 1.0F); 00333 // 00334 // } 00335 // //itsEdgesDT = chamfer34(edgeImg, 50.0F); //get the distance to edges max at 50pixels 00336 // itsEdgesDT = saliencyChamfer34(edgeImg); //get the distance to edges max at 50pixels 00337 // itsEdgesOri = oriImg; 00338 // } 00339 //} 00340 // 00341 //void V1::evolveCanny() 00342 //{ 00343 // Image<float> magImg, oriImg; 00344 // //for(uint i=0; i<itsV1CellsInput.size(); i++) 00345 // 00346 // for(uint i=0; i<1; i++) 00347 // { 00348 // gradientSobel(itsV1CellsInput[i], magImg, oriImg); 00349 // Image<float> edgeImg(magImg.getDims(), ZEROS); 00350 // 00351 // inplaceNormalize(itsV1CellsInput[i], 0.0F, 255.0F); 00352 // Image<byte> in = itsV1CellsInput[i]; 00353 // Image<byte> edges(in.getDims(), ZEROS); 00354 // cvCanny(img2ipl(in), img2ipl(edges), 50, 100); 00355 // 00356 // itsEdgesState.clear(); 00357 // 00358 // for(int y=0; y<edges.getHeight(); y++) 00359 // for(int x=0; x<edges.getWidth(); x++) 00360 // { 00361 // if (edges.getVal(x,y) > 0) 00362 // { 00363 // float edgeProb = magImg.getVal(x,y)/200; //1.0F/(1.0F + expf(0.09*(30.0-magImg.getVal(x,y)))); 00364 // if (edgeProb > 1.0) edgeProb = 1.0; 00365 // if (edgeProb > 0.0) 00366 // { 00367 // EdgeState edgeState; 00368 // edgeState.pos = Point2D<int>(x,y); 00369 // edgeState.ori = oriImg.getVal(x,y); 00370 // edgeState.var = (10*M_PI/180)*(10*M_PI/180); //10 00371 // edgeState.prob = edgeProb; 00372 // 00373 // itsEdgesState.push_back(edgeState); 00374 // } 00375 // 00376 // //Build the edgeDistance with a threshold 00377 // // if (edgeProb > 0.25) 00378 // edgeImg.setVal(Point2D<int>(x,y), 1.0F); 00379 // } 00380 // 00381 // } 00382 // //itsEdgesDT = chamfer34(edgeImg, 50.0F); //get the distance to edges max at 50pixels 00383 // itsEdgesDT = saliencyChamfer34(edgeImg); //get the distance to edges max at 50pixels 00384 // itsEdgesOri = oriImg; 00385 // } 00386 //} 00387 // 00388 // 00389 //// ###################################################################### 00390 //void V1::evolveGabor() 00391 //{ 00392 // 00393 // // float filter_period = 100; 00394 // // float elongation = 2.0; 00395 // // float angle = 90; 00396 // // int size = -1; 00397 // // const double major_stddev = filter_period / 30.0; 00398 // // const double minor_stddev = major_stddev * elongation; 00399 // 00400 // //Image<float> gabor0 = gaborFilter3(major_stddev, minor_stddev, 00401 // // filter_period, 90, 180 - angle + 90.0 , size); 00402 // //Image<float> gabor90 = gaborFilter3(major_stddev, minor_stddev, 00403 // // filter_period, 0, 180 - angle + 90.0 , size); 00404 // 00405 // Image<float> gabor0 = gaborFilter<float>(5.0F, //stdev 00406 // 1.08, //period 00407 // 0.0F, //phase 00408 // 90.0F); //theta 00409 //// Image<float> gabor90 = gaborFilter<float>(5.0F, //stdev 00410 //// 1.08, //period 00411 //// 0.0F, //phase 00412 //// 0.0F); //theta 00413 //// 00414 // 00415 // // normalize to unit sum-of-squares: 00416 // gabor0 -= mean(gabor0); gabor0 /= sum(squared(gabor0)); 00417 //// gabor90 -= mean(gabor90); gabor90 /= sum(squared(gabor90)); 00418 // 00419 // Image<float> f0 = optConvolve(itsV1CellsInput[0], gabor0); 00420 // // SHOWIMG(f0); 00421 // //Image<float> f90 = optConvolve(itsV1CellsInput[0], gabor90); 00422 // //Image<float> out = sqrt(squared(f0) + squared(f90)); 00423 // 00424 // //SHOWIMG(f0); 00425 // //SHOWIMG(f90); 00426 // 00427 // //SHOWIMG(out); 00428 // Point2D<int> maxPos; float maxVal; 00429 // findMax(f0, maxPos, maxVal); 00430 // LINFO("Max at %i,%i %f", maxPos.i, maxPos.j, maxVal); 00431 // printf("%f;\n", maxVal); 00432 // fflush(stdout); 00433 // 00434 // //SHOWIMG(out); 00435 // 00436 //} 00437 00438 00439 Layout<PixRGB<byte> > V1::getDebugImage() 00440 { 00441 Layout<PixRGB<byte> > outDisp; 00442 00443 EigenSpace eigen = getTensorEigen(itsEdgesState.lumTensorField); 00444 Image<float> lumFeatures = eigen.l1-eigen.l2; 00445 00446 eigen = getTensorEigen(itsEdgesState.rgTensorField); 00447 Image<float> rgFeatures = eigen.l1-eigen.l2; 00448 00449 eigen = getTensorEigen(itsEdgesState.byTensorField); 00450 Image<float> byFeatures = eigen.l1-eigen.l2; 00451 00452 inplaceNormalize(lumFeatures, 0.0F, 255.0F); 00453 inplaceNormalize(rgFeatures, 0.0F, 255.0F); 00454 inplaceNormalize(byFeatures, 0.0F, 255.0F); 00455 00456 //SHOWIMG(lumFeatures); 00457 00458 Image<PixRGB<byte> > attnInput = itsInput; //itsLGNData[0]; 00459 00460 Image<PixRGB<byte> > input = itsLGNData[0]; 00461 if (itsAttenLoc.isValid()) 00462 { 00463 drawRect(input, Rectangle(itsAttenLoc, itsWinSize), PixRGB<byte>(0,255,0), 3); 00464 input = rescale(input, attnInput.getDims()); 00465 } 00466 00467 char msg[255]; 00468 sprintf(msg, "T: %0.2f BT: %0.2f", itsThreshold*100, itsBiasThreshold*100); 00469 writeText(attnInput, Point2D<int>(0,0), msg, 00470 PixRGB<byte>(255,255,255), 00471 PixRGB<byte>(0,0,0)); 00472 00473 for(uint i=0; i<itsSpatialBias.size(); i++) 00474 { 00475 Rectangle rect = Rectangle::centerDims(itsSpatialBias[i].loc, itsSpatialBias[i].dims); 00476 if (attnInput.rectangleOk(rect)) 00477 drawRect(attnInput,rect , 00478 PixRGB<byte>(255,0,0)); 00479 } 00480 00481 00482 00483 outDisp = hcat(input, attnInput); 00484 outDisp = hcat(outDisp, toRGB(Image<byte>(lumFeatures))); 00485 outDisp = hcat(outDisp, toRGB(Image<byte>(rgFeatures))); 00486 outDisp = hcat(outDisp, toRGB(Image<byte>(byFeatures))); 00487 00488 return outDisp; 00489 00490 } 00491 00492 // ###################################################################### 00493 /* So things look consistent in everyone's emacs... */ 00494 /* Local Variables: */ 00495 /* indent-tabs-mode: nil */ 00496 /* End: */ 00497 00498 #endif 00499