00001 /*!@file SceneUnderstanding/SMap.C */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00005 // by the University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Lior Elazary <elazary@usc.edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/plugins/SceneUnderstanding/SMap.C $ 00035 // $Id: SMap.C 13551 2010-06-10 21:56:32Z itti $ 00036 // 00037 00038 #ifndef SMap_C_DEFINED 00039 #define SMap_C_DEFINED 00040 00041 #include "plugins/SceneUnderstanding/SMap.H" 00042 00043 #include "Image/DrawOps.H" 00044 #include "Image/MathOps.H" 00045 #include "Image/Kernels.H" 00046 #include "Image/FilterOps.H" 00047 #include "Image/Convolutions.H" 00048 #include "Image/fancynorm.H" 00049 #include "Image/Point3D.H" 00050 #include "Image/ColorOps.H" 00051 #include "Simulation/SimEventQueue.H" 00052 #include "Neuro/EnvVisualCortex.H" 00053 #include "GUI/DebugWin.H" 00054 #include "Util/MathFunctions.H" 00055 #include <math.h> 00056 #include <fcntl.h> 00057 #include <limits> 00058 #include <string> 00059 00060 const ModelOptionCateg MOC_SMap = { 00061 MOC_SORTPRI_3, "SMap-Related Options" }; 00062 00063 // Used by: SimulationViewerEyeMvt 00064 const ModelOptionDef OPT_SMapShowDebug = 00065 { MODOPT_ARG(bool), "SMapShowDebug", &MOC_SMap, OPTEXP_CORE, 00066 "Show debug img", 00067 "SMap-debug", '\0', "<true|false>", "false" }; 00068 00069 //Define the inst function name 00070 SIMMODULEINSTFUNC(SMap); 00071 00072 00073 // ###################################################################### 00074 SMap::SMap(OptionManager& mgr, const std::string& descrName, 00075 const std::string& tagName) : 00076 SimModule(mgr, descrName, tagName), 00077 SIMCALLBACK_INIT(SimEventInputFrame), 00078 SIMCALLBACK_INIT(SimEventSaveOutput), 00079 SIMCALLBACK_INIT(SimEventUserInput), 00080 itsShowDebug(&OPT_SMapShowDebug, this) 00081 00082 { 00083 00084 itsEvc = nub::soft_ref<EnvVisualCortex>(new EnvVisualCortex(mgr)); 00085 addSubComponent(itsEvc); 00086 00087 00088 } 00089 00090 // ###################################################################### 00091 SMap::~SMap() 00092 { 00093 00094 } 00095 00096 // ###################################################################### 00097 void SMap::onSimEventInputFrame(SimEventQueue& q, 00098 rutz::shared_ptr<SimEventInputFrame>& e) 00099 { 00100 itsSMapCellsInput = e->frame().asRgb(); 00101 evolve(); 00102 00103 q.post(rutz::make_shared(new SimEventSMapOutput(this, itsSMapState, itsSMap))); 00104 00105 } 00106 00107 // ###################################################################### 00108 void SMap::onSimEventSaveOutput(SimEventQueue& q, rutz::shared_ptr<SimEventSaveOutput>& e) 00109 { 00110 if (itsShowDebug.getVal()) 00111 { 00112 // get the OFS to save to, assuming sinfo is of type 00113 // SimModuleSaveInfo (will throw a fatal exception otherwise): 00114 nub::ref<FrameOstream> ofs = 00115 dynamic_cast<const SimModuleSaveInfo&>(e->sinfo()).ofs; 00116 Layout<PixRGB<byte> > disp = getDebugImage(); 00117 ofs->writeRgbLayout(disp, "SMap", FrameInfo("SMap", SRC_POS)); 00118 } 00119 } 00120 00121 00122 // ###################################################################### 00123 void SMap::onSimEventUserInput(SimEventQueue& q, rutz::shared_ptr<SimEventUserInput>& e) 00124 { 00125 00126 } 00127 00128 00129 // ###################################################################### 00130 void SMap::evolve() 00131 { 00132 //SHOWIMG(itsSMapCellsInput); 00133 00134 //CvMemStorage *storage2 = cvCreateMemStorage(1000); //TODO need to clear memory 00135 //CvSeq *comp; 00136 //int level = 1; 00137 //int threshold1 = 50; 00138 //int threshold2 = 20; 00139 //Image<PixRGB<byte> > dst(itsSMapCellsInput.getDims(), ZEROS); 00140 00141 //cvPyrSegmentation(img2ipl(itsSMapCellsInput), img2ipl(dst), 00142 // storage2, &comp, level, threshold1+1, threshold2+1); 00143 00144 ////SHOWIMG(dst); 00145 00146 00147 00148 //Image<byte> input = luminance(itsSMapCellsInput); 00149 ////SHOWIMG(input); 00150 //IplImage* img = img2ipl(input); 00151 //IplImage* marker_mask = cvCreateImage(cvGetSize(img), 8, 1); 00152 //cvCanny(img, marker_mask, 50, 100); 00153 00154 //Image<byte> edges = ipl2gray(marker_mask); 00155 ////SHOWIMG(edges); 00156 //CvMemStorage* storage = cvCreateMemStorage(0); 00157 00158 ////CvSeq* lines = cvHoughLines2(img2ipl(edges), storage, CV_HOUGH_STANDARD, 1, CV_PI/180, 50, 0, 0); 00159 //CvSeq* lines = cvHoughLines2(img2ipl(edges), storage, CV_HOUGH_PROBABILISTIC, 1, CV_PI/180, 250, 50, 10); 00160 00161 00162 //tmpEdges = edges; //Image<byte>(edges.getDims(), ZEROS); 00163 //Image<PixRGB<byte> > linesImg = edges; 00164 //for(int i=0; i<lines->total; i++) 00165 //{ 00166 // //float* line = (float*)cvGetSeqElem(lines,i); 00167 // //float rho = line[0]; 00168 // //float theta = line[1]; 00169 // //CvPoint pt1, pt2; 00170 // //double a = cos(theta), b = sin(theta); 00171 // //double x0 = a*rho, y0 = b*rho; 00172 // //pt1.x = cvRound(x0 + 1000*(-b)); 00173 // //pt1.y = cvRound(y0 + 1000*(a)); 00174 // //pt2.x = cvRound(x0 - 1000*(-b)); 00175 // //pt2.y = cvRound(y0 - 1000*(a)); 00176 00177 // CvPoint* line = (CvPoint*)cvGetSeqElem(lines,i); 00178 // CvPoint pt1 = line[0]; 00179 // CvPoint pt2 = line[1]; 00180 // 00181 // drawLine(linesImg, Point2D<int>(pt1.x,pt1.y), Point2D<int>(pt2.x, pt2.y), PixRGB<byte>(255,0,0)); 00182 //} 00183 00184 ////SHOWIMG(linesImg); 00185 // 00186 00187 //// LINFO("Calc vanishing points"); 00188 //// vanishingPoints(lines, cvSize(img->width, img->height), 2*(img->width), 2*(img->height), 00189 //// 50, 000); 00190 00191 00192 00193 //CvSeq* contours = 0; 00194 //cvFindContours( marker_mask, storage, &contours, sizeof(CvContour), 00195 // CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE, cvPoint(0,0) ); 00196 00197 //Image<PixRGB<byte> > contoursImg(edges.getDims(), ZEROS); 00198 //while( contours ) 00199 //{ 00200 // // approximate contour with accuracy proportional 00201 // // to the contour perimeter 00202 // CvSeq* result = 00203 // cvApproxPoly( contours, sizeof(CvContour), storage, 00204 // CV_POLY_APPROX_DP, cvContourPerimeter(contours)*0.02, 0 ); 00205 // // square contours should have 4 vertices after approximation 00206 // // relatively large area (to filter out noisy contours) 00207 // // and be convex. 00208 // // Note: absolute value of an area is used because 00209 // // area may be positive or negative - in accordance with the 00210 // // contour orientation 00211 // const double area = fabs(cvContourArea(result,CV_WHOLE_SEQ)); 00212 // if (result->total && 00213 // area >= 10) // && area <= 7000 && 00214 // //cvCheckContourConvexity(result)) 00215 // { 00216 // //double s = 0; 00217 00218 // //for (int i = 0; i < 4; ++i) 00219 // //{ 00220 // // // find minimum angle between joint 00221 // // // edges (maximum of cosine) 00222 // // const double t = 00223 // // fabs(angle((CvPoint*)cvGetSeqElem( result, i % 4 ), 00224 // // (CvPoint*)cvGetSeqElem( result, (i-2) % 4 ), 00225 // // (CvPoint*)cvGetSeqElem( result, (i-1) % 4 ))); 00226 // // s = s > t ? s : t; 00227 // //} 00228 00229 00230 // // if cosines of all angles are small 00231 // // (all angles are ~90 degree) then write quandrangle 00232 // // vertices to resultant sequence 00233 // //if (s < mincos) 00234 // { 00235 // CvPoint *p1 = (CvPoint*)cvGetSeqElem( result, result->total-1 ); 00236 // for (int i = 0; i < result->total; ++i) 00237 // { 00238 // CvPoint *p = (CvPoint*)cvGetSeqElem( result, i ); 00239 // drawLine(contoursImg, Point2D<int>(p1->x, p1->y), Point2D<int>(p->x, p->y), 00240 // PixRGB<byte>(255, 0, 0)); 00241 // p1 = p; 00242 00243 // //cvSeqPush(squares, 00244 // // (CvPoint*)cvGetSeqElem( result, i )); 00245 // } 00246 // // LINFO("area=%f, mincos=%f", area, s); 00247 // } 00248 // } 00249 00250 // // take the next contour 00251 // contours = contours->h_next; 00252 //} 00253 ////SHOWIMG(contoursImg); 00254 // 00255 // 00256 00257 ////CvMemStorage* storage = cvCreateMemStorage(0); 00258 ////CvSeq* contours = 0; 00259 ////int comp_count = 0; 00260 00261 00262 ////cvFindContours( marker_mask, storage, &contours, sizeof(CvContour), 00263 //// CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE ); 00264 ////IplImage* markers = cvCreateImage(cvGetSize(img), IPL_DEPTH_32F, 1); 00265 ////cvZero( markers ); 00266 ////for( ; contours != 0; contours = contours->h_next, comp_count++ ) 00267 ////{ 00268 //// cvDrawContours( markers, contours, cvScalarAll(comp_count+1), 00269 //// cvScalarAll(comp_count+1), -1, -1, 8, cvPoint(0,0) ); 00270 ////} 00271 ////SHOWIMG(ipl2float(markers)); 00272 00273 ////CvMat* color_tab = cvCreateMat( 1, comp_count, CV_8UC3 ); 00274 ////CvRNG rng = cvRNG(-1); 00275 ////for(int i = 0; i < comp_count; i++ ) 00276 ////{ 00277 //// uchar* ptr = color_tab->data.ptr + i*3; 00278 //// ptr[0] = (uchar)(cvRandInt(&rng)%180 + 50); 00279 //// ptr[1] = (uchar)(cvRandInt(&rng)%180 + 50); 00280 //// ptr[2] = (uchar)(cvRandInt(&rng)%180 + 50); 00281 ////} 00282 00283 ////{ 00284 //// double t = (double)cvGetTickCount(); 00285 //// cvWatershed( img2ipl(itsSMapCellsInput), markers ); 00286 //// t = (double)cvGetTickCount() - t; 00287 //// LINFO( "exec time = %gms", t/(cvGetTickFrequency()*1000.) ); 00288 ////} 00289 00290 ////// paint the watershed image 00291 ////IplImage* wshed = cvCloneImage(img2ipl(itsSMapCellsInput)); 00292 ////for(int i = 0; i < markers->height; i++ ) 00293 //// for(int j = 0; j < markers->width; j++ ) 00294 //// { 00295 //// int idx = CV_IMAGE_ELEM( markers, int, i, j ); 00296 //// uchar* dst = &CV_IMAGE_ELEM( wshed, uchar, i, j*3 ); 00297 //// if( idx == -1 ) 00298 //// dst[0] = dst[1] = dst[2] = (uchar)255; 00299 //// else if( idx <= 0 || idx > comp_count ) 00300 //// dst[0] = dst[1] = dst[2] = (uchar)0; // should not get here 00301 //// else 00302 //// { 00303 //// uchar* ptr = color_tab->data.ptr + (idx-1)*3; 00304 //// dst[0] = ptr[0]; dst[1] = ptr[1]; dst[2] = ptr[2]; 00305 //// } 00306 //// } 00307 00308 //////cvAddWeighted( wshed, 0.5, img_gray, 0.5, 0, wshed ); 00309 ////SHOWIMG(ipl2rgb(wshed)); 00310 00311 00312 00313 00314 itsEvc->input(itsSMapCellsInput); 00315 //itsEvc->input(dst); 00316 itsSMap = itsEvc->getVCXmap(); 00317 00318 //Image<float> lum = luminance(dst); 00319 Image<float> lum = luminance(itsSMapCellsInput); 00320 inplaceNormalize(lum, 0.0F, 255.0F); 00321 00322 SMapState background; 00323 std::vector<SMapState> foregroundObjs; 00324 00325 //Get the background/foreground region by looking at low saliency values 00326 Image<float> smap = rescale(itsSMap, itsSMapCellsInput.getDims()); 00327 for(int y=0; y<smap.getHeight(); y++) 00328 for(int x=0; x<smap.getWidth(); x++) 00329 { 00330 Point2D<int> pixLoc(x,y); 00331 float val = smap.getVal(pixLoc); 00332 00333 00334 //Check if we have a foreground or background by looking at saliency values 00335 //and calculate the mean and variance of the pixels in the background/foreground 00336 float pixVal = lum.getVal(x,y); 00337 if (val < 40) 00338 { 00339 background.region.push_back(pixLoc); 00340 00341 if (background.mu == -1) 00342 { 00343 background.mu = pixVal; 00344 background.sigma = 0; 00345 } 00346 else 00347 { 00348 //compute the stddev and mean of each feature 00349 //This algorithm is due to Knuth (The Art of Computer Programming, volume 2: 00350 // Seminumerical Algorithms, 3rd edn., p. 232. Boston: Addison-Wesley.) 00351 00352 //Mean 00353 const double delta = pixVal - background.mu; 00354 background.mu += delta/background.region.size(); 00355 00356 //variance 00357 if (background.region.size() > 2) //to avoid divide by 0 00358 { 00359 background.sigma = (background.sigma*(background.region.size()-2)) + delta*(pixVal-background.mu); 00360 background.sigma /= double(background.region.size()-1); 00361 } 00362 } 00363 00364 } else { 00365 00366 //find the foreground object closest to this pix in value and distance 00367 int objId = -1; 00368 double maxProb = 0.0; 00369 for(uint i=0; i<foregroundObjs.size(); i++) 00370 { 00371 double valProb = gauss<double>(pixVal, foregroundObjs[i].mu, foregroundObjs[i].sigma); 00372 //double distProb = gauss<double>(pixLoc.distance(foregroundObjs[i].center), 0, 00373 // foregroundObjs[i].sigma); 00374 00375 double prob = valProb; // + log(distProb); 00376 00377 if (prob > maxProb && prob > 1.0e-3) //get the max prob and the prob with values grater then min prob 00378 { 00379 maxProb = prob; 00380 objId = i; 00381 } 00382 } 00383 00384 if (objId == -1) 00385 { 00386 //New object add it 00387 SMapState objectRegion; 00388 objectRegion.mu = pixVal; 00389 objectRegion.sigma = 5; 00390 objectRegion.center = pixLoc; 00391 objectRegion.region.push_back(pixLoc); 00392 foregroundObjs.push_back(objectRegion); 00393 } else { 00394 //Add it to the exsisting object 00395 00396 foregroundObjs[objId].region.push_back(pixLoc); 00397 00398 //compute the stddev and mean of each feature 00399 //This algorithm is due to Knuth (The Art of Computer Programming, volume 2: 00400 // Seminumerical Algorithms, 3rd edn., p. 232. Boston: Addison-Wesley.) 00401 00402 00403 //Mean 00404 const double delta = pixVal - foregroundObjs[objId].mu; 00405 foregroundObjs[objId].mu += delta/foregroundObjs[objId].region.size(); 00406 00407 //variance 00408 if (foregroundObjs[objId].region.size() > 2) //to avoid divide by 0 00409 { 00410 foregroundObjs[objId].sigma = (foregroundObjs[objId].sigma*(foregroundObjs[objId].region.size()-2)) + delta*(pixVal-foregroundObjs[objId].mu); 00411 foregroundObjs[objId].sigma /= double(foregroundObjs[objId].region.size()-1); 00412 } 00413 } 00414 } 00415 } 00416 00417 //Add the objects to the map 00418 itsSMapState.clear(); 00419 00420 background.sigma = sqrt(background.sigma); 00421 itsSMapState.push_back(background); 00422 00423 for(uint i=0; i<foregroundObjs.size(); i++) 00424 { 00425 foregroundObjs[i].sigma = sqrt(foregroundObjs[i].sigma); 00426 itsSMapState.push_back(foregroundObjs[i]); 00427 } 00428 00429 } 00430 00431 00432 Layout<PixRGB<byte> > SMap::getDebugImage() 00433 { 00434 Layout<PixRGB<byte> > outDisp; 00435 00436 Image<PixRGB<byte> > in = itsSMapCellsInput; 00437 //inplaceNormalize(in, 0.0F, 255.0F); 00438 00439 Image<float> perc(in.getDims(), ZEROS); 00440 00441 for(uint i=0; i<itsSMapState.size(); i++) 00442 { 00443 SMapState rs = itsSMapState[i]; 00444 for(uint pix=0; pix<rs.region.size(); pix++) 00445 { 00446 //Could sample from the disterbusion and show these pixels 00447 perc.setVal(rs.region[pix], rs.mu); 00448 } 00449 } 00450 00451 Image<PixRGB<byte> > tmp = perc; 00452 // for(int y=0; y < tmpEdges.getHeight(); y++) 00453 // for(int x=0; x < tmpEdges.getWidth(); x++) 00454 // if (tmpEdges.getVal(x,y) > 0) 00455 // tmp.setVal(x,y,PixRGB<byte>(255,0,0)); 00456 // 00457 00458 Image<byte> smap = rescale(itsSMap, in.getDims()); 00459 inplaceNormalize(smap, (byte)0, (byte)255); 00460 00461 // outDisp = hcat(in, toRGB(smap)); 00462 outDisp = hcat(toRGB(smap), tmp); //toRGB(Image<byte>(perc))); 00463 00464 return outDisp; 00465 00466 } 00467 00468 // ###################################################################### 00469 /* So things look consistent in everyone's emacs... */ 00470 /* Local Variables: */ 00471 /* indent-tabs-mode: nil */ 00472 /* End: */ 00473 00474 #endif 00475