00001 /*! @file Beobot/app-build-salObjDB.C Build a database of salient VisualObject 00002 from a stream input */ 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00005 // by the University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Christian Siagian <siagian@usc.edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Beobot/app-build-salObjDB.C $ 00035 // $Id: app-build-salObjDB.C 13712 2010-07-28 21:00:40Z itti $ 00036 // 00037 00038 #include "Beobot/Landmark.H" 00039 #include "Channels/ChannelOpts.H" 00040 #include "Component/GlobalOpts.H" 00041 #include "Component/ModelManager.H" 00042 #include "Component/ModelOptionDef.H" 00043 #include "Component/OptionManager.H" 00044 #include "GUI/XWinManaged.H" 00045 #include "Gist/FFN.H" 00046 #include "Gist/trainUtils.H" 00047 #include "Image/ColorOps.H" 00048 #include "Image/CutPaste.H" 00049 #include "Image/DrawOps.H" 00050 #include "Image/MathOps.H" 00051 #include "Image/Pixels.H" 00052 #include "Image/ShapeOps.H" 00053 #include "Image/Transforms.H" 00054 #include "Media/MPEGStream.H" 00055 #include "Media/MediaOpts.H" 00056 #include "Media/MediaSimEvents.H" 00057 #include "Neuro/GistEstimator.H" 00058 #include "Neuro/InferoTemporal.H" 00059 #include "Neuro/NeuroOpts.H" 00060 #include "Neuro/NeuroSimEvents.H" 00061 #include "Neuro/Retina.H" 00062 #include "Neuro/ShapeEstimator.H" 00063 #include "Neuro/ShapeEstimatorModes.H" 00064 #include "Neuro/SpatialMetrics.H" 00065 #include "Neuro/StdBrain.H" 00066 #include "Neuro/gistParams.H" 00067 #include "Neuro/VisualCortex.H" 00068 #include "Raster/Raster.H" 00069 #include "SIFT/Histogram.H" 00070 #include "SIFT/Keypoint.H" 00071 #include "SIFT/VisualObject.H" 00072 #include "SIFT/VisualObjectDB.H" 00073 #include "Simulation/SimEventQueueConfigurator.H" 00074 #include "Util/Timer.H" 00075 00076 00077 #define DB_NAME "out_database" 00078 00079 #define W_ASPECT_RATIO 320 // ideal minimum width for display 00080 #define H_ASPECT_RATIO 240 // ideal minimum height for display 00081 00082 FeedForwardNetwork *ffn_place; 00083 double **gistW = NULL; 00084 00085 CloseButtonListener wList; 00086 XWinManaged *salWin; 00087 XWinManaged *gistWin; 00088 rutz::shared_ptr<XWinManaged> objWin; 00089 00090 int wDisp, hDisp, sDisp, scaleDisp; 00091 int wDispWin, hDispWin; 00092 00093 // gist display 00094 int pcaW = 16, pcaH = 5; 00095 int winBarW = 5, winBarH = 25; 00096 00097 // number of landmarks produced 00098 int numObj = 0; 00099 00100 // clip list 00101 uint nCat = 0; 00102 std::vector<std::string>* clipList; 00103 00104 // ###################################################################### 00105 void setupDispWin (int w, int h); 00106 Image< PixRGB<byte> > getGistDispImg (Image< PixRGB<byte> > img, 00107 Image<float> gistImg, 00108 Image<float> gistPcaImg, 00109 Image<float> outHistImg); 00110 Image< PixRGB<byte> > getSalDispImg (Image< PixRGB<byte> > img, 00111 Image<float> roiImg, 00112 Image< PixRGB<byte> > objImg, 00113 Point2D<int> winner, int fNum); 00114 void processSalCue (Image<PixRGB<byte> > inputImg, 00115 nub::soft_ref<StdBrain> brain, 00116 Point2D<int> winner, int fNum, 00117 std::vector< rutz::shared_ptr<Landmark> >& 00118 landmarks, 00119 const Image<float>& semask, const std::string& selabel); 00120 void setupCases (const char* fname); 00121 // ###################################################################### 00122 00123 // Main function 00124 /*! Load a database, enrich it with new VisualObject entities 00125 extracted from the given images, and save it back. */ 00126 int main(const int argc, const char **argv) 00127 { 00128 MYLOGVERB = LOG_INFO; // suppress debug messages 00129 00130 // Instantiate a ModelManager: 00131 ModelManager manager("Salient objects DB Builder Model"); 00132 00133 // we cannot use saveResults() on our various ModelComponent objects 00134 // here, so let's not export the related command-line options. 00135 manager.allowOptions(OPTEXP_ALL & (~OPTEXP_SAVE)); 00136 00137 // Instantiate our various ModelComponents: 00138 nub::soft_ref<SimEventQueueConfigurator> 00139 seqc(new SimEventQueueConfigurator(manager)); 00140 manager.addSubComponent(seqc); 00141 00142 nub::soft_ref<InputMPEGStream> 00143 ims(new InputMPEGStream(manager, "Input MPEG Stream", "InputMPEGStream")); 00144 manager.addSubComponent(ims); 00145 00146 nub::soft_ref<StdBrain> brain(new StdBrain(manager)); 00147 manager.addSubComponent(brain); 00148 00149 nub::ref<SpatialMetrics> metrics(new SpatialMetrics(manager)); 00150 manager.addSubComponent(metrics); 00151 00152 manager.exportOptions(MC_RECURSE); 00153 metrics->setFOAradius(30); // FIXME 00154 metrics->setFoveaRadius(30); // FIXME 00155 manager.setOptionValString(&OPT_MaxNormType, "FancyOne"); 00156 manager.setOptionValString(&OPT_UseRandom, "false"); 00157 00158 manager.setOptionValString(&OPT_IORtype, "Disc"); 00159 manager.setOptionValString(&OPT_RawVisualCortexChans,"OIC"); 00160 00161 // customize the region considered part of the "object" 00162 // manager.setOptionValString("ShapeEstimatorMode","SaliencyMap"); 00163 // manager.setOptionValString(&OPT_ShapeEstimatorMode,"ConspicuityMap"); 00164 manager.setOptionValString(&OPT_ShapeEstimatorMode, "FeatureMap"); 00165 manager.setOptionValString(&OPT_ShapeEstimatorSmoothMethod, "Chamfer"); 00166 //manager.setOptionValString(&OPT_ShapeEstimatorSmoothMethod, "Gaussian"); 00167 00168 // set up the GIST ESTIMATOR 00169 //manager.setOptionValString(&OPT_GistEstimatorType,"Std"); 00170 00171 // DO NOT set up the INFEROTEMPORAL 00172 //manager.setOptionValString(&OPT_InferoTemporalType,"Std"); 00173 //manager.setOptionValString(&OPT_AttentionObjRecog,"yes"); 00174 //manager.setOptionValString(&OPT_MatchObjects,"false"); 00175 00176 // Request a bunch of option aliases (shortcuts to lists of options): 00177 REQUEST_OPTIONALIAS_NEURO(manager); 00178 00179 // Parse command-line: 00180 if (manager.parseCommandLine(argc, argv, "<input_gistList.txt>", 00181 1, 1) == false) 00182 return(1); 00183 00184 nub::soft_ref<SimEventQueue> seq = seqc->getQ(); 00185 00186 // NOTE: this could now be controlled by a command-line option 00187 // --preload-mpeg=true 00188 manager.setOptionValString(&OPT_InputMPEGStreamPreload, "true"); 00189 00190 setupCases(manager.getExtraArg(0).c_str()); 00191 00192 // frame delay in seconds 00193 double rtdelay = 33.3667/1000.0; // real time 00194 double fdelay = rtdelay * 3; // NOTE: 3 times slower than real time 00195 00196 Image< PixRGB<byte> > inputImg; 00197 Image< PixRGB<byte> > gistDispImg; 00198 int w = 0, h = 0; 00199 00200 SimTime prevstime = SimTime::ZERO(); uint fNum = 0; 00201 fNum = 0; 00202 00203 // let's get all our ModelComponent instances started: 00204 manager.start(); 00205 00206 // FIX: WE NEED TO START UTILIZING THIS 00207 // load the database: REPLACE BY LANDMARK for tracking purposes 00208 // rutz::shared_ptr<VisualObjectDB> vdb(new VisualObjectDB()); 00209 //if (vdb->loadFrom(DB_NAME)) 00210 // LINFO("Starting with empty VisualObjectDB."); 00211 00212 // SIFT visual object related 00213 std::vector< rutz::shared_ptr<Landmark> >** landmarks 00214 = new std::vector< rutz::shared_ptr<Landmark> >*[nCat]; 00215 00216 // for each category in the list 00217 int fTotal = 0; 00218 for(uint i = 0; i < nCat; i++) 00219 { 00220 landmarks[i] = new std::vector< rutz::shared_ptr<Landmark> > 00221 [clipList[i].size()]; 00222 // FIX: index is bigger than itsObject.size() 00223 // seems that a value is not reset when we are changing clips 00224 00225 // for each movie in that category 00226 for(uint j = 0; j < clipList[i].size(); j++) 00227 { 00228 // do post-command-line configs: 00229 ims->setFileName(clipList[i][j]); 00230 LINFO("Loading[%d][%d]: %s",i,j,clipList[i][j].c_str()); 00231 Raster::waitForKey(); 00232 00233 if(i ==0 && j == 0) 00234 { 00235 Dims iDims = ims->peekDims(); 00236 manager.setOptionValString(&OPT_InputFrameDims, 00237 convertToString(ims->peekDims())); 00238 w = iDims.w() - 50 + 1; h = iDims.h(); 00239 LINFO("w: %d, h: %d",w, h); 00240 00241 // setup display at the start of stream 00242 // NOTE: wDisp, hDisp, and sDisp are modified here 00243 setupDispWin(w, h); 00244 } 00245 00246 bool eoClip = false; 00247 fNum = 0; 00248 00249 // process until end of clip 00250 while(!eoClip) 00251 { 00252 // has the time come for a new frame? 00253 // If we want to SLOW THINGS DOWN change fdelay 00254 if (fNum == 0 || 00255 (seq->now() - 0.5 * (prevstime - seq->now())).secs() - fTotal * fdelay > fdelay) 00256 { 00257 // load new frame: // FIX THE SECOND CONDITION LATER 00258 inputImg = ims->readRGB(); 00259 if (inputImg.initialized() == false || (fNum == 5)) 00260 eoClip = true; // end of input stream 00261 else 00262 { 00263 // take out frame borders NOTE: ONLY FOR SONY CAMCORDER 00264 inputImg = crop(inputImg, Rectangle::tlbrI(0, 25, h-1, 25 + w - 1)); 00265 00266 // pass input to brain: 00267 LINFO("new frame Number: %d",fNum); 00268 rutz::shared_ptr<SimEventInputFrame> 00269 e(new SimEventInputFrame(brain.get(), GenericFrame(inputImg), 0)); 00270 seq->post(e); // post the image to the brain 00271 00272 // if we are tracking objects 00273 LINFO("Currently we have: %"ZU" objects in DB[%d][%d]", 00274 landmarks[i][j].size(),i,j); 00275 std::string imgName(sformat("image%07d", fNum)); 00276 00277 // FIX: is this redundant w/ IT 00278 rutz::shared_ptr<VisualObject> 00279 newVO(new VisualObject(imgName, "", inputImg)); 00280 for(uint k = 0; k < landmarks[i][j].size(); k++) 00281 { 00282 landmarks[i][j][k]->build(newVO, fNum); 00283 00284 // print the current location and velocity 00285 Point2D<int> pos = landmarks[i][j][k]->getPosition(); 00286 //Point2D<int> vel = landmarks[i][j][k]->getVelocity(); 00287 LINFO("landmark[%d][%d][%d]: %s is at %d,%d", i, j, k, 00288 landmarks[i][j][k]->getName().c_str(), pos.i, pos.j); 00289 // FIX NOTE: maybe need to put the position 00290 // (and thus the motion) in the name (for servoing) 00291 } 00292 00293 // increment frame count 00294 fNum++;fTotal++; 00295 } 00296 } 00297 00298 // evolve brain: 00299 prevstime = seq->now(); // time before current step 00300 const SimStatus status = seq->evolve(); 00301 00302 // process if SALIENT location is found 00303 if (SeC<SimEventWTAwinner> 00304 e = seq->check<SimEventWTAwinner>(0)) 00305 { 00306 // segment out salient location 00307 // check against the database 00308 const Point2D<int> winner = e->winner().p; 00309 //if(landmarks[i][j].size() == 0) // <------CHANGE THIS LATER 00310 00311 Image<float> semask; std::string selabel; 00312 if (SeC<SimEventShapeEstimatorOutput> 00313 e = seq->check<SimEventShapeEstimatorOutput>(0)) 00314 { semask = e->smoothMask(); selabel = e->winningLabel(); } 00315 00316 processSalCue(inputImg, brain, winner, fNum-1, landmarks[i][j], semask, selabel); 00317 } 00318 00319 if (SIM_BREAK == status) // Brain decided it's time to quit 00320 eoClip = true; 00321 00322 } // END while(!eoClip) 00323 00324 // display the current resulting database: 00325 LINFO("there are %" ZU " landmarks recovered in DB[%d][%d]", 00326 landmarks[i][j].size(),i,j); 00327 for(uint k = 0; k < landmarks[i][j].size(); k++) 00328 { 00329 LINFO(" %d: %s", k, landmarks[i][j][k]->getName().c_str()); 00330 rutz::shared_ptr<VisualObjectDB> voDB = 00331 landmarks[i][j][k]->getVisualObjectDB(); 00332 00333 // check the number of evidence for each landmark 00334 for(uint l = 0; l < voDB->numObjects(); l++) 00335 { 00336 LINFO(" %d: %s", l, voDB->getObject(l)->getName().c_str()); 00337 Image< PixRGB<byte> > tImg(2*w,2*h,ZEROS); 00338 inplacePaste(tImg, voDB->getObject(l)->getImage(), Point2D<int>(0, 0)); 00339 objWin->drawImage(tImg,0,0); 00340 Raster::waitForKey(); 00341 } 00342 } 00343 } 00344 00345 // we can now combine the salient objects across lighting condition 00346 // FIX: ADD 00347 00348 // take out moving things by discarding objects that are only exist in 1 clip. 00349 00350 // keep objects with a lot of salient hits 00351 00352 // order object with the starting frame number 00353 00354 // watch out for overlapping objects 00355 } 00356 00357 // save the resulting database: 00358 //if(vdb->numObjects() != 0) 00359 // vdb->saveTo(DB_NAME); 00360 00361 // stop all our ModelComponents 00362 manager.stop(); 00363 00364 // all done! 00365 return 0; 00366 } 00367 00368 // ###################################################################### 00369 // process salient cues 00370 void processSalCue(const Image<PixRGB<byte> > inputImg, 00371 nub::soft_ref<StdBrain> brain, Point2D<int> winner, int fNum, 00372 std::vector< rutz::shared_ptr<Landmark> >& landmarks, 00373 const Image<float>& semask, const std::string& selabel) 00374 { 00375 const int w = inputImg.getWidth(); 00376 const int h = inputImg.getHeight(); 00377 00378 // segment out the object -> maybe port to infero-temporal later 00379 // ---------------------------------------------- 00380 Image<float> roiImg; 00381 Image<PixRGB<byte> > objImg; Point2D<int> objOffset; 00382 00383 bool useSE = true; 00384 00385 // use Shape estimator to focus on the attended region when available 00386 if (semask.initialized()) 00387 { 00388 roiImg = semask * luminance(inputImg); 00389 float mn, mx; getMinMax(semask, mn, mx); 00390 Rectangle r = findBoundingRect(semask, mx*.05f); 00391 objImg = crop(inputImg, r); 00392 objOffset = Point2D<int>(r.left(),r.top()); 00393 00394 // and size is not too big (below 50% input image) 00395 int wSE = objImg.getWidth(), hSE = objImg.getHeight(); 00396 if(wSE * hSE > .5 * w * h) 00397 { 00398 LINFO("SE Smooth Mask is too big: %d > %d", wSE*hSE, int(.5*w*h)); 00399 useSE = false; 00400 } 00401 else 00402 LINFO("SE Smooth Mask is used %d <= %d", wSE*hSE, int(.5*w*h)); 00403 } 00404 else 00405 { 00406 roiImg = luminance(inputImg); 00407 objImg = inputImg; 00408 objOffset = Point2D<int>(0,0); 00409 useSE = false; 00410 LINFO("SE Smooth Mask not yet initialized"); 00411 } 00412 00413 // otherwise use pre-set 100x100window 00414 if(!useSE) 00415 { 00416 Rectangle roi = 00417 Rectangle::tlbrI(winner.j - 50, winner.i - 50, 00418 winner.j + 50, winner.i + 50); 00419 roi = roi.getOverlap(inputImg.getBounds()); 00420 00421 // keep the roiImg 00422 objImg = crop(inputImg, roi); 00423 objOffset = Point2D<int>(roi.left(),roi.top()); 00424 00425 LINFO("SE not ready"); 00426 Raster::waitForKey(); 00427 } 00428 00429 LINFO("TOP LEFT at: (%d,%d)", objOffset.i, objOffset.j); 00430 00431 // draw the results 00432 salWin->drawImage(getSalDispImg(inputImg,roiImg,objImg, winner, fNum),0,0); 00433 LINFO("Frame: %d, winner: (%d,%d) in %s", fNum, winner.i, winner.j, 00434 selabel.c_str()); 00435 if(fNum > 50) 00436 Raster::waitForKey(); 00437 00438 // need a Visual Cortex to obtain the feature vector 00439 LFATAL("fixme using a SimReq"); 00440 ////////nub::soft_ref<VisualCortex> vc = brain->getVC(); 00441 std::vector<float> fvec; /////////vc->getFeatures(winner, fvec); 00442 00443 // create a new VisualObject (a set of SIFT keypoints) 00444 // with the top-left coordinate of the window 00445 rutz::shared_ptr<VisualObject> 00446 obj(new VisualObject("NewObject", "NewObject", objImg, 00447 winner - objOffset, fvec)); 00448 00449 std::string objName(sformat("obj%07d", numObj)); 00450 obj->setName(objName); 00451 obj->setImageFname(objName + ".png"); 00452 numObj++; 00453 00454 // check with the salient regions DB before adding 00455 int trackAccepted = 0; 00456 LINFO("we have: %"ZU" landmarks to match", landmarks.size()); 00457 for(uint i = 0; i < landmarks.size(); i++) 00458 { 00459 LINFO("tracking landmark number: %d",i); 00460 rutz::shared_ptr<VisualObjectMatch> cmatch = 00461 landmarks[i]->build(obj, objOffset, fNum); 00462 if(cmatch.is_valid() && cmatch->getScore() > 3.0) 00463 trackAccepted++; 00464 } 00465 00466 // if it's not used by any of the existing landmarks entry 00467 if(trackAccepted == 0) 00468 { 00469 // create a new one 00470 LINFO("create a new Landmark number %"ZU,landmarks.size()); 00471 std::string lmName(sformat("landmark%07"ZU, landmarks.size())); 00472 rutz::shared_ptr<Landmark> newlm(new Landmark(obj, objOffset, fNum, lmName)); 00473 newlm->setMatchWin(objWin); 00474 landmarks.push_back(newlm); 00475 if(fNum > 50) 00476 Raster::waitForKey(); 00477 } 00478 else if(trackAccepted > 1) 00479 { 00480 LINFO("May have: %d objects jumbled together", trackAccepted); 00481 } 00482 } 00483 00484 // ###################################################################### 00485 // setup display window for visualization purposes 00486 void setupDispWin(int w, int h) 00487 { 00488 00489 //==================================================================== 00490 /* 00491 // figure out the best display w, h, and scale for gist 00492 00493 // check if both dimensions of the image 00494 // are much smaller than the desired resolution 00495 scaleDisp = 1; 00496 while (w*scaleDisp < W_ASPECT_RATIO*.75 && h*scaleDisp < H_ASPECT_RATIO*.75) 00497 scaleDisp++; 00498 00499 // check if the height is longer aspect-ratio-wise 00500 // this is because the whole display is setup wrt/ to it 00501 wDisp = w*scaleDisp; hDisp = h*scaleDisp; 00502 if(wDisp/(0.0 + W_ASPECT_RATIO) > hDisp/(0.0 + H_ASPECT_RATIO)) 00503 hDisp = (int)(wDisp / (0.0 + W_ASPECT_RATIO) * H_ASPECT_RATIO)+1; 00504 else 00505 wDisp = (int)(hDisp / (0.0 + H_ASPECT_RATIO) * W_ASPECT_RATIO)+1; 00506 00507 // add slack so that the gist feature entry is square 00508 sDisp = (hDisp/NUM_GIST_FEAT + 1); 00509 hDisp = sDisp * NUM_GIST_FEAT; 00510 00511 // add space for all the visuals 00512 wDispWin = wDisp + sDisp * NUM_GIST_COL; 00513 hDispWin = hDisp + sDisp * pcaH * 2; 00514 00515 gistWin = new XWinManaged(Dims(wDispWin, hDispWin), 0, 0, "Gist Related"); 00516 wList.add(gistWin); 00517 */ 00518 //==================================================================== 00519 00520 salWin = new XWinManaged(Dims(2*w, 2*h), 2*w, 0, "Saliency Related" ); 00521 wList.add(salWin); 00522 00523 objWin.reset(new XWinManaged(Dims(2*w, 2*h), 0, 0, "Object Match" )); 00524 wList.add(*objWin); 00525 00526 } 00527 00528 // ###################################################################### 00529 // open the *_gistList.txt file containing all the list of .mpg files 00530 void setupCases(const char* fname) 00531 { 00532 char comment[200]; char folder[200]; 00533 FILE *fp; char inLine[100]; 00534 00535 // get the folder, 47 is a slash '/' 00536 const char* tp = strrchr(fname,47); 00537 strncpy(folder,fname,tp-fname+1); folder[tp-fname+1] = '\0'; 00538 LINFO("Folder %s -> %s", fname, folder); 00539 00540 // open a file that lists the sample with ground truth 00541 if((fp = fopen(fname,"rb")) == NULL) 00542 LFATAL("gistList file: %s not found",fname); 00543 00544 // skip number of samples 00545 if (fgets(inLine, 1000, fp) == NULL) LFATAL("fgets failed"); 00546 00547 // get the number of categories 00548 if (fgets(inLine, 1000, fp) == NULL) LFATAL("fgets failed"); sscanf(inLine, "%d %s", &nCat, comment); 00549 clipList = new std::vector<std::string>[nCat]; 00550 00551 // skip the type of ground truth and column headers 00552 if (fgets(inLine, 1000, fp) == NULL) LFATAL("fgets failed"); 00553 if (fgets(inLine, 1000, fp) == NULL) LFATAL("fgets failed"); 00554 00555 char fileName[200]; 00556 char cName[100]; char sName[100]; char ext[100]; 00557 int cStart, cNum; int gTruth; 00558 00559 while(fgets(inLine, 1000, fp) != NULL) 00560 { 00561 // get the files in this category and ground truth 00562 sscanf(inLine, "%s %d %d %d %s", cName, &cStart, &cNum, &gTruth, ext); 00563 char* cname = strrchr(cName,95); // 95 is underscore '_' 00564 strncpy(sName,cName,cname-cName); sName[cname-cName] = '\0'; 00565 sprintf(fileName,"%s%s.mpg", folder,sName); 00566 clipList[gTruth].push_back(fileName); 00567 //LINFO(" sName: %s -:- %d", fileName, gTruth); 00568 } 00569 00570 // //for display 00571 // for(uint i = 0; i < nCat; i++) 00572 // { 00573 // for(uint j = 0; j < clipList[i].size(); j++) 00574 // { 00575 // LINFO("%d %d: %s",i,j,clipList[i][j].c_str()); 00576 // } 00577 // LINFO(" "); 00578 // } 00579 00580 fclose(fp); 00581 } 00582 00583 // ###################################################################### 00584 // get saliency display image for visualization purposes 00585 Image< PixRGB<byte> > getSalDispImg (Image< PixRGB<byte> > img, 00586 Image<float> roiImg, 00587 Image< PixRGB<byte> > objImg, 00588 Point2D<int> winner, 00589 int fNum) 00590 { 00591 int w = img.getWidth(), h = img.getHeight(); 00592 Image< PixRGB<byte> > salDispImg(2*w,2*h,ZEROS); 00593 00594 inplacePaste(salDispImg, img, Point2D<int>(0, 0)); 00595 Image<float> rRoiImg = roiImg; 00596 float min,max; 00597 getMinMax(roiImg,min,max); 00598 drawCircle( roiImg, winner, 10, 0.0f, 1); 00599 drawPoint ( roiImg, winner.i, winner.j, 0.0f); 00600 drawCircle(rRoiImg, winner, 10, 255.0f, 1); 00601 drawPoint (rRoiImg, winner.i, winner.j, 255.0f); 00602 Image< PixRGB<byte> > t = makeRGB(rRoiImg,roiImg,roiImg); 00603 inplacePaste(salDispImg, t, Point2D<int>(0, h)); 00604 inplacePaste(salDispImg, objImg, Point2D<int>(w, h)); 00605 00606 writeText(salDispImg, Point2D<int>(w,0), sformat("%d",fNum).c_str(), 00607 PixRGB<byte>(0,0,0), PixRGB<byte>(255,255,255)); 00608 return salDispImg; 00609 } 00610 00611 // ###################################################################### 00612 // get gist display image for visualization purposes 00613 Image< PixRGB<byte> > getGistDispImg (Image< PixRGB<byte> > img, 00614 Image<float> gistImg, 00615 Image<float> gistPcaImg, 00616 Image<float> outHistImg) 00617 { 00618 Image< PixRGB<byte> > gistDispImg(wDispWin, hDispWin, ZEROS); 00619 int w = img.getWidth(); int h = img.getHeight(); 00620 00621 // grid the displayed input image 00622 drawGrid(img, w/4,h/4,1,1,PixRGB<byte>(255,255,255)); 00623 inplacePaste(gistDispImg, img, Point2D<int>(0, 0)); 00624 00625 // display the gist features 00626 inplaceNormalize(gistImg, 0.0f, 255.0f); 00627 inplacePaste(gistDispImg, Image<PixRGB<byte> >(gistImg), Point2D<int>(wDisp, 0)); 00628 00629 // display the PCA gist features 00630 inplaceNormalize(gistPcaImg, 0.0f, 255.0f); 00631 inplacePaste(gistDispImg, Image<PixRGB<byte> >(gistPcaImg), Point2D<int>(wDisp, hDisp)); 00632 00633 // display the classifier output histogram 00634 inplaceNormalize(outHistImg, 0.0f, 255.0f); 00635 inplacePaste(gistDispImg, Image<PixRGB<byte> >(outHistImg), Point2D<int>(0, hDisp)); 00636 00637 // draw lines delineating the information 00638 drawLine(gistDispImg, Point2D<int>(0,hDisp), 00639 Point2D<int>(wDispWin,hDisp), 00640 PixRGB<byte>(255,255,255),1); 00641 drawLine(gistDispImg, Point2D<int>(wDisp-1,0), 00642 Point2D<int>(wDisp-1,hDispWin-1), 00643 PixRGB<byte>(255,255,255),1); 00644 return gistDispImg; 00645 } 00646 00647 // ###################################################################### 00648 /* So things look consistent in everyone's emacs... */ 00649 /* Local Variables: */ 00650 /* indent-tabs-mode: nil */ 00651 /* End: */