00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #include "Beobot/Landmark.H"
00039 #include "Channels/ChannelOpts.H"
00040 #include "Component/GlobalOpts.H"
00041 #include "Component/ModelManager.H"
00042 #include "Component/ModelOptionDef.H"
00043 #include "Component/OptionManager.H"
00044 #include "GUI/XWinManaged.H"
00045 #include "Gist/FFN.H"
00046 #include "Gist/trainUtils.H"
00047 #include "Image/ColorOps.H"
00048 #include "Image/CutPaste.H"
00049 #include "Image/DrawOps.H"
00050 #include "Image/MathOps.H"
00051 #include "Image/Pixels.H"
00052 #include "Image/ShapeOps.H"
00053 #include "Image/Transforms.H"
00054 #include "Media/MPEGStream.H"
00055 #include "Media/MediaOpts.H"
00056 #include "Media/MediaSimEvents.H"
00057 #include "Neuro/GistEstimator.H"
00058 #include "Neuro/InferoTemporal.H"
00059 #include "Neuro/NeuroOpts.H"
00060 #include "Neuro/NeuroSimEvents.H"
00061 #include "Neuro/Retina.H"
00062 #include "Neuro/ShapeEstimator.H"
00063 #include "Neuro/ShapeEstimatorModes.H"
00064 #include "Neuro/SpatialMetrics.H"
00065 #include "Neuro/StdBrain.H"
00066 #include "Neuro/gistParams.H"
00067 #include "Neuro/VisualCortex.H"
00068 #include "Raster/Raster.H"
00069 #include "SIFT/Histogram.H"
00070 #include "SIFT/Keypoint.H"
00071 #include "SIFT/VisualObject.H"
00072 #include "SIFT/VisualObjectDB.H"
00073 #include "Simulation/SimEventQueueConfigurator.H"
00074 #include "Util/Timer.H"
00075
00076
00077 #define DB_NAME "out_database"
00078
00079 #define W_ASPECT_RATIO 320 // ideal minimum width for display
00080 #define H_ASPECT_RATIO 240 // ideal minimum height for display
00081
00082 FeedForwardNetwork *ffn_place;
00083 double **gistW = NULL;
00084
00085 CloseButtonListener wList;
00086 XWinManaged *salWin;
00087 XWinManaged *gistWin;
00088 rutz::shared_ptr<XWinManaged> objWin;
00089
00090 int wDisp, hDisp, sDisp, scaleDisp;
00091 int wDispWin, hDispWin;
00092
00093
00094 int pcaW = 16, pcaH = 5;
00095 int winBarW = 5, winBarH = 25;
00096
00097
00098 int numObj = 0;
00099
00100
00101 uint nCat = 0;
00102 std::vector<std::string>* clipList;
00103
00104
00105 void setupDispWin (int w, int h);
00106 Image< PixRGB<byte> > getGistDispImg (Image< PixRGB<byte> > img,
00107 Image<float> gistImg,
00108 Image<float> gistPcaImg,
00109 Image<float> outHistImg);
00110 Image< PixRGB<byte> > getSalDispImg (Image< PixRGB<byte> > img,
00111 Image<float> roiImg,
00112 Image< PixRGB<byte> > objImg,
00113 Point2D<int> winner, int fNum);
00114 void processSalCue (Image<PixRGB<byte> > inputImg,
00115 nub::soft_ref<StdBrain> brain,
00116 Point2D<int> winner, int fNum,
00117 std::vector< rutz::shared_ptr<Landmark> >&
00118 landmarks,
00119 const Image<float>& semask, const std::string& selabel);
00120 void setupCases (const char* fname);
00121
00122
00123
00124
00125
00126 int main(const int argc, const char **argv)
00127 {
00128 MYLOGVERB = LOG_INFO;
00129
00130
00131 ModelManager manager("Salient objects DB Builder Model");
00132
00133
00134
00135 manager.allowOptions(OPTEXP_ALL & (~OPTEXP_SAVE));
00136
00137
00138 nub::soft_ref<SimEventQueueConfigurator>
00139 seqc(new SimEventQueueConfigurator(manager));
00140 manager.addSubComponent(seqc);
00141
00142 nub::soft_ref<InputMPEGStream>
00143 ims(new InputMPEGStream(manager, "Input MPEG Stream", "InputMPEGStream"));
00144 manager.addSubComponent(ims);
00145
00146 nub::soft_ref<StdBrain> brain(new StdBrain(manager));
00147 manager.addSubComponent(brain);
00148
00149 nub::ref<SpatialMetrics> metrics(new SpatialMetrics(manager));
00150 manager.addSubComponent(metrics);
00151
00152 manager.exportOptions(MC_RECURSE);
00153 metrics->setFOAradius(30);
00154 metrics->setFoveaRadius(30);
00155 manager.setOptionValString(&OPT_MaxNormType, "FancyOne");
00156 manager.setOptionValString(&OPT_UseRandom, "false");
00157
00158 manager.setOptionValString(&OPT_IORtype, "Disc");
00159 manager.setOptionValString(&OPT_RawVisualCortexChans,"OIC");
00160
00161
00162
00163
00164 manager.setOptionValString(&OPT_ShapeEstimatorMode, "FeatureMap");
00165 manager.setOptionValString(&OPT_ShapeEstimatorSmoothMethod, "Chamfer");
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177 REQUEST_OPTIONALIAS_NEURO(manager);
00178
00179
00180 if (manager.parseCommandLine(argc, argv, "<input_gistList.txt>",
00181 1, 1) == false)
00182 return(1);
00183
00184 nub::soft_ref<SimEventQueue> seq = seqc->getQ();
00185
00186
00187
00188 manager.setOptionValString(&OPT_InputMPEGStreamPreload, "true");
00189
00190 setupCases(manager.getExtraArg(0).c_str());
00191
00192
00193 double rtdelay = 33.3667/1000.0;
00194 double fdelay = rtdelay * 3;
00195
00196 Image< PixRGB<byte> > inputImg;
00197 Image< PixRGB<byte> > gistDispImg;
00198 int w = 0, h = 0;
00199
00200 SimTime prevstime = SimTime::ZERO(); uint fNum = 0;
00201 fNum = 0;
00202
00203
00204 manager.start();
00205
00206
00207
00208
00209
00210
00211
00212
00213 std::vector< rutz::shared_ptr<Landmark> >** landmarks
00214 = new std::vector< rutz::shared_ptr<Landmark> >*[nCat];
00215
00216
00217 int fTotal = 0;
00218 for(uint i = 0; i < nCat; i++)
00219 {
00220 landmarks[i] = new std::vector< rutz::shared_ptr<Landmark> >
00221 [clipList[i].size()];
00222
00223
00224
00225
00226 for(uint j = 0; j < clipList[i].size(); j++)
00227 {
00228
00229 ims->setFileName(clipList[i][j]);
00230 LINFO("Loading[%d][%d]: %s",i,j,clipList[i][j].c_str());
00231 Raster::waitForKey();
00232
00233 if(i ==0 && j == 0)
00234 {
00235 Dims iDims = ims->peekDims();
00236 manager.setOptionValString(&OPT_InputFrameDims,
00237 convertToString(ims->peekDims()));
00238 w = iDims.w() - 50 + 1; h = iDims.h();
00239 LINFO("w: %d, h: %d",w, h);
00240
00241
00242
00243 setupDispWin(w, h);
00244 }
00245
00246 bool eoClip = false;
00247 fNum = 0;
00248
00249
00250 while(!eoClip)
00251 {
00252
00253
00254 if (fNum == 0 ||
00255 (seq->now() - 0.5 * (prevstime - seq->now())).secs() - fTotal * fdelay > fdelay)
00256 {
00257
00258 inputImg = ims->readRGB();
00259 if (inputImg.initialized() == false || (fNum == 5))
00260 eoClip = true;
00261 else
00262 {
00263
00264 inputImg = crop(inputImg, Rectangle::tlbrI(0, 25, h-1, 25 + w - 1));
00265
00266
00267 LINFO("new frame Number: %d",fNum);
00268 rutz::shared_ptr<SimEventInputFrame>
00269 e(new SimEventInputFrame(brain.get(), GenericFrame(inputImg), 0));
00270 seq->post(e);
00271
00272
00273 LINFO("Currently we have: %"ZU" objects in DB[%d][%d]",
00274 landmarks[i][j].size(),i,j);
00275 std::string imgName(sformat("image%07d", fNum));
00276
00277
00278 rutz::shared_ptr<VisualObject>
00279 newVO(new VisualObject(imgName, "", inputImg));
00280 for(uint k = 0; k < landmarks[i][j].size(); k++)
00281 {
00282 landmarks[i][j][k]->build(newVO, fNum);
00283
00284
00285 Point2D<int> pos = landmarks[i][j][k]->getPosition();
00286
00287 LINFO("landmark[%d][%d][%d]: %s is at %d,%d", i, j, k,
00288 landmarks[i][j][k]->getName().c_str(), pos.i, pos.j);
00289
00290
00291 }
00292
00293
00294 fNum++;fTotal++;
00295 }
00296 }
00297
00298
00299 prevstime = seq->now();
00300 const SimStatus status = seq->evolve();
00301
00302
00303 if (SeC<SimEventWTAwinner>
00304 e = seq->check<SimEventWTAwinner>(0))
00305 {
00306
00307
00308 const Point2D<int> winner = e->winner().p;
00309
00310
00311 Image<float> semask; std::string selabel;
00312 if (SeC<SimEventShapeEstimatorOutput>
00313 e = seq->check<SimEventShapeEstimatorOutput>(0))
00314 { semask = e->smoothMask(); selabel = e->winningLabel(); }
00315
00316 processSalCue(inputImg, brain, winner, fNum-1, landmarks[i][j], semask, selabel);
00317 }
00318
00319 if (SIM_BREAK == status)
00320 eoClip = true;
00321
00322 }
00323
00324
00325 LINFO("there are %" ZU " landmarks recovered in DB[%d][%d]",
00326 landmarks[i][j].size(),i,j);
00327 for(uint k = 0; k < landmarks[i][j].size(); k++)
00328 {
00329 LINFO(" %d: %s", k, landmarks[i][j][k]->getName().c_str());
00330 rutz::shared_ptr<VisualObjectDB> voDB =
00331 landmarks[i][j][k]->getVisualObjectDB();
00332
00333
00334 for(uint l = 0; l < voDB->numObjects(); l++)
00335 {
00336 LINFO(" %d: %s", l, voDB->getObject(l)->getName().c_str());
00337 Image< PixRGB<byte> > tImg(2*w,2*h,ZEROS);
00338 inplacePaste(tImg, voDB->getObject(l)->getImage(), Point2D<int>(0, 0));
00339 objWin->drawImage(tImg,0,0);
00340 Raster::waitForKey();
00341 }
00342 }
00343 }
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355 }
00356
00357
00358
00359
00360
00361
00362 manager.stop();
00363
00364
00365 return 0;
00366 }
00367
00368
00369
00370 void processSalCue(const Image<PixRGB<byte> > inputImg,
00371 nub::soft_ref<StdBrain> brain, Point2D<int> winner, int fNum,
00372 std::vector< rutz::shared_ptr<Landmark> >& landmarks,
00373 const Image<float>& semask, const std::string& selabel)
00374 {
00375 const int w = inputImg.getWidth();
00376 const int h = inputImg.getHeight();
00377
00378
00379
00380 Image<float> roiImg;
00381 Image<PixRGB<byte> > objImg; Point2D<int> objOffset;
00382
00383 bool useSE = true;
00384
00385
00386 if (semask.initialized())
00387 {
00388 roiImg = semask * luminance(inputImg);
00389 float mn, mx; getMinMax(semask, mn, mx);
00390 Rectangle r = findBoundingRect(semask, mx*.05f);
00391 objImg = crop(inputImg, r);
00392 objOffset = Point2D<int>(r.left(),r.top());
00393
00394
00395 int wSE = objImg.getWidth(), hSE = objImg.getHeight();
00396 if(wSE * hSE > .5 * w * h)
00397 {
00398 LINFO("SE Smooth Mask is too big: %d > %d", wSE*hSE, int(.5*w*h));
00399 useSE = false;
00400 }
00401 else
00402 LINFO("SE Smooth Mask is used %d <= %d", wSE*hSE, int(.5*w*h));
00403 }
00404 else
00405 {
00406 roiImg = luminance(inputImg);
00407 objImg = inputImg;
00408 objOffset = Point2D<int>(0,0);
00409 useSE = false;
00410 LINFO("SE Smooth Mask not yet initialized");
00411 }
00412
00413
00414 if(!useSE)
00415 {
00416 Rectangle roi =
00417 Rectangle::tlbrI(winner.j - 50, winner.i - 50,
00418 winner.j + 50, winner.i + 50);
00419 roi = roi.getOverlap(inputImg.getBounds());
00420
00421
00422 objImg = crop(inputImg, roi);
00423 objOffset = Point2D<int>(roi.left(),roi.top());
00424
00425 LINFO("SE not ready");
00426 Raster::waitForKey();
00427 }
00428
00429 LINFO("TOP LEFT at: (%d,%d)", objOffset.i, objOffset.j);
00430
00431
00432 salWin->drawImage(getSalDispImg(inputImg,roiImg,objImg, winner, fNum),0,0);
00433 LINFO("Frame: %d, winner: (%d,%d) in %s", fNum, winner.i, winner.j,
00434 selabel.c_str());
00435 if(fNum > 50)
00436 Raster::waitForKey();
00437
00438
00439 LFATAL("fixme using a SimReq");
00440
00441 std::vector<float> fvec;
00442
00443
00444
00445 rutz::shared_ptr<VisualObject>
00446 obj(new VisualObject("NewObject", "NewObject", objImg,
00447 winner - objOffset, fvec));
00448
00449 std::string objName(sformat("obj%07d", numObj));
00450 obj->setName(objName);
00451 obj->setImageFname(objName + ".png");
00452 numObj++;
00453
00454
00455 int trackAccepted = 0;
00456 LINFO("we have: %"ZU" landmarks to match", landmarks.size());
00457 for(uint i = 0; i < landmarks.size(); i++)
00458 {
00459 LINFO("tracking landmark number: %d",i);
00460 rutz::shared_ptr<VisualObjectMatch> cmatch =
00461 landmarks[i]->build(obj, objOffset, fNum);
00462 if(cmatch.is_valid() && cmatch->getScore() > 3.0)
00463 trackAccepted++;
00464 }
00465
00466
00467 if(trackAccepted == 0)
00468 {
00469
00470 LINFO("create a new Landmark number %"ZU,landmarks.size());
00471 std::string lmName(sformat("landmark%07"ZU, landmarks.size()));
00472 rutz::shared_ptr<Landmark> newlm(new Landmark(obj, objOffset, fNum, lmName));
00473 newlm->setMatchWin(objWin);
00474 landmarks.push_back(newlm);
00475 if(fNum > 50)
00476 Raster::waitForKey();
00477 }
00478 else if(trackAccepted > 1)
00479 {
00480 LINFO("May have: %d objects jumbled together", trackAccepted);
00481 }
00482 }
00483
00484
00485
00486 void setupDispWin(int w, int h)
00487 {
00488
00489
00490
00491
00492
00493
00494
00495
00496
00497
00498
00499
00500
00501
00502
00503
00504
00505
00506
00507
00508
00509
00510
00511
00512
00513
00514
00515
00516
00517
00518
00519
00520 salWin = new XWinManaged(Dims(2*w, 2*h), 2*w, 0, "Saliency Related" );
00521 wList.add(salWin);
00522
00523 objWin.reset(new XWinManaged(Dims(2*w, 2*h), 0, 0, "Object Match" ));
00524 wList.add(*objWin);
00525
00526 }
00527
00528
00529
00530 void setupCases(const char* fname)
00531 {
00532 char comment[200]; char folder[200];
00533 FILE *fp; char inLine[100];
00534
00535
00536 const char* tp = strrchr(fname,47);
00537 strncpy(folder,fname,tp-fname+1); folder[tp-fname+1] = '\0';
00538 LINFO("Folder %s -> %s", fname, folder);
00539
00540
00541 if((fp = fopen(fname,"rb")) == NULL)
00542 LFATAL("gistList file: %s not found",fname);
00543
00544
00545 if (fgets(inLine, 1000, fp) == NULL) LFATAL("fgets failed");
00546
00547
00548 if (fgets(inLine, 1000, fp) == NULL) LFATAL("fgets failed"); sscanf(inLine, "%d %s", &nCat, comment);
00549 clipList = new std::vector<std::string>[nCat];
00550
00551
00552 if (fgets(inLine, 1000, fp) == NULL) LFATAL("fgets failed");
00553 if (fgets(inLine, 1000, fp) == NULL) LFATAL("fgets failed");
00554
00555 char fileName[200];
00556 char cName[100]; char sName[100]; char ext[100];
00557 int cStart, cNum; int gTruth;
00558
00559 while(fgets(inLine, 1000, fp) != NULL)
00560 {
00561
00562 sscanf(inLine, "%s %d %d %d %s", cName, &cStart, &cNum, &gTruth, ext);
00563 char* cname = strrchr(cName,95);
00564 strncpy(sName,cName,cname-cName); sName[cname-cName] = '\0';
00565 sprintf(fileName,"%s%s.mpg", folder,sName);
00566 clipList[gTruth].push_back(fileName);
00567
00568 }
00569
00570
00571
00572
00573
00574
00575
00576
00577
00578
00579
00580 fclose(fp);
00581 }
00582
00583
00584
00585 Image< PixRGB<byte> > getSalDispImg (Image< PixRGB<byte> > img,
00586 Image<float> roiImg,
00587 Image< PixRGB<byte> > objImg,
00588 Point2D<int> winner,
00589 int fNum)
00590 {
00591 int w = img.getWidth(), h = img.getHeight();
00592 Image< PixRGB<byte> > salDispImg(2*w,2*h,ZEROS);
00593
00594 inplacePaste(salDispImg, img, Point2D<int>(0, 0));
00595 Image<float> rRoiImg = roiImg;
00596 float min,max;
00597 getMinMax(roiImg,min,max);
00598 drawCircle( roiImg, winner, 10, 0.0f, 1);
00599 drawPoint ( roiImg, winner.i, winner.j, 0.0f);
00600 drawCircle(rRoiImg, winner, 10, 255.0f, 1);
00601 drawPoint (rRoiImg, winner.i, winner.j, 255.0f);
00602 Image< PixRGB<byte> > t = makeRGB(rRoiImg,roiImg,roiImg);
00603 inplacePaste(salDispImg, t, Point2D<int>(0, h));
00604 inplacePaste(salDispImg, objImg, Point2D<int>(w, h));
00605
00606 writeText(salDispImg, Point2D<int>(w,0), sformat("%d",fNum).c_str(),
00607 PixRGB<byte>(0,0,0), PixRGB<byte>(255,255,255));
00608 return salDispImg;
00609 }
00610
00611
00612
00613 Image< PixRGB<byte> > getGistDispImg (Image< PixRGB<byte> > img,
00614 Image<float> gistImg,
00615 Image<float> gistPcaImg,
00616 Image<float> outHistImg)
00617 {
00618 Image< PixRGB<byte> > gistDispImg(wDispWin, hDispWin, ZEROS);
00619 int w = img.getWidth(); int h = img.getHeight();
00620
00621
00622 drawGrid(img, w/4,h/4,1,1,PixRGB<byte>(255,255,255));
00623 inplacePaste(gistDispImg, img, Point2D<int>(0, 0));
00624
00625
00626 inplaceNormalize(gistImg, 0.0f, 255.0f);
00627 inplacePaste(gistDispImg, Image<PixRGB<byte> >(gistImg), Point2D<int>(wDisp, 0));
00628
00629
00630 inplaceNormalize(gistPcaImg, 0.0f, 255.0f);
00631 inplacePaste(gistDispImg, Image<PixRGB<byte> >(gistPcaImg), Point2D<int>(wDisp, hDisp));
00632
00633
00634 inplaceNormalize(outHistImg, 0.0f, 255.0f);
00635 inplacePaste(gistDispImg, Image<PixRGB<byte> >(outHistImg), Point2D<int>(0, hDisp));
00636
00637
00638 drawLine(gistDispImg, Point2D<int>(0,hDisp),
00639 Point2D<int>(wDispWin,hDisp),
00640 PixRGB<byte>(255,255,255),1);
00641 drawLine(gistDispImg, Point2D<int>(wDisp-1,0),
00642 Point2D<int>(wDisp-1,hDispWin-1),
00643 PixRGB<byte>(255,255,255),1);
00644 return gistDispImg;
00645 }
00646
00647
00648
00649
00650
00651