00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058 #include "Channels/ChannelOpts.H"
00059 #include "Component/GlobalOpts.H"
00060 #include "Component/ModelManager.H"
00061 #include "Component/ModelOptionDef.H"
00062 #include "Component/OptionManager.H"
00063 #include "Devices/FrameGrabberConfigurator.H"
00064 #include "Devices/DeviceOpts.H"
00065 #include "GUI/XWinManaged.H"
00066 #include "Gist/FFN.H"
00067 #include "Gist/trainUtils.H"
00068 #include "Image/ColorOps.H"
00069 #include "Image/CutPaste.H"
00070 #include "Image/DrawOps.H"
00071 #include "Image/ImageCache.H"
00072 #include "Image/MathOps.H"
00073 #include "Image/MatrixOps.H"
00074 #include "Image/Pixels.H"
00075 #include "Image/Pixels.H"
00076 #include "Image/ShapeOps.H"
00077 #include "Image/Transforms.H"
00078 #include "Media/MPEGStream.H"
00079 #include "Media/MediaOpts.H"
00080 #include "Media/MediaSimEvents.H"
00081 #include "Neuro/GistEstimatorStd.H"
00082 #include "Neuro/GistEstimatorFFT.H"
00083 #include "Neuro/InferoTemporal.H"
00084 #include "Neuro/NeuroOpts.H"
00085 #include "Neuro/NeuroSimEvents.H"
00086 #include "Neuro/Retina.H"
00087 #include "Neuro/ShapeEstimator.H"
00088 #include "Neuro/ShapeEstimatorModes.H"
00089 #include "Neuro/SpatialMetrics.H"
00090 #include "Neuro/StdBrain.H"
00091 #include "Neuro/gistParams.H"
00092 #include "Raster/Raster.H"
00093 #include "SIFT/Histogram.H"
00094 #include "Transport/FrameIstream.H"
00095 #include "SIFT/Keypoint.H"
00096 #include "SIFT/VisualObject.H"
00097 #include "SIFT/VisualObjectDB.H"
00098 #include "Simulation/SimEventQueueConfigurator.H"
00099 #include "Util/Timer.H"
00100
00101
00102 #define NAVG 20
00103
00104 #define W_ASPECT_RATIO 320 // ideal minimum width for display
00105 #define H_ASPECT_RATIO 240 // ideal minimum height for display
00106
00107 rutz::shared_ptr<FeedForwardNetwork> ffn_place;
00108 Image<double> pcaIcaMatrix;
00109
00110 CloseButtonListener wList;
00111 XWinManaged *inputWin;
00112 XWinManaged *salWin;
00113 XWinManaged *gistWin;
00114
00115 int wDisp, hDisp, sDisp, scaleDisp;
00116 int wDispWin, hDispWin;
00117
00118
00119 int pcaW = 16, pcaH = 5;
00120 int winBarW = 5, winBarH = 25;
00121
00122
00123 void setupDispWin (int w, int h);
00124 Image< PixRGB<byte> > getGistDispImg (Image< PixRGB<byte> > img, Image<float> gistImg,
00125 Image<float> gistPcaImg, Image<float> outHistImg);
00126 void processSalCue (Image<PixRGB<byte> > inputImg,
00127 nub::soft_ref<StdBrain> brain, Point2D<int> winner, int fNum,
00128 const Image<float>& semask, const std::string& selabel);
00129
00130
00131 int main(const int argc, const char **argv)
00132 {
00133 MYLOGVERB = LOG_INFO;
00134
00135
00136 ModelManager manager("Place Localization Model");
00137
00138
00139
00140 manager.allowOptions(OPTEXP_ALL & (~OPTEXP_SAVE));
00141
00142
00143
00144 nub::soft_ref<SimEventQueueConfigurator>
00145 seqc(new SimEventQueueConfigurator(manager));
00146 manager.addSubComponent(seqc);
00147
00148 nub::soft_ref<InputMPEGStream>
00149 ims(new InputMPEGStream(manager, "Input MPEG Stream", "InputMPEGStream"));
00150 manager.addSubComponent(ims);
00151
00152
00153 nub::soft_ref<FrameGrabberConfigurator>
00154 gbc(new FrameGrabberConfigurator(manager));
00155 manager.addSubComponent(gbc);
00156
00157 nub::soft_ref<StdBrain> brain(new StdBrain(manager));
00158 manager.addSubComponent(brain);
00159
00160 nub::ref<SpatialMetrics> metrics(new SpatialMetrics(manager));
00161 manager.addSubComponent(metrics);
00162
00163 manager.exportOptions(MC_RECURSE);
00164 metrics->setFOAradius(30);
00165 metrics->setFoveaRadius(30);
00166 manager.setOptionValString(&OPT_MaxNormType, "FancyOne");
00167 manager.setOptionValString(&OPT_UseRandom, "false");
00168
00169
00170 manager.setOptionValString(&OPT_ShapeEstimatorMode, "FeatureMap");
00171 manager.setOptionValString(&OPT_ShapeEstimatorSmoothMethod, "Chamfer");
00172
00173 manager.setOptionValString(&OPT_RawVisualCortexChans,"OIC");
00174 manager.setOptionValString(&OPT_IORtype, "Disc");
00175
00176
00177
00178
00179
00180 manager.setOptionValString(&OPT_InferoTemporalType,"Std");
00181 manager.setOptionValString(&OPT_AttentionObjRecog,"yes");
00182 manager.setOptionValString(&OPT_MatchObjects,"false");
00183
00184 REQUEST_OPTIONALIAS_NEURO(manager);
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194 if (manager.parseCommandLine(argc, argv, "<input.mpg/CAMERA> <input_train.txt>"
00195 "[output_directory] [index]",
00196 2, 4) == false)
00197 return(1);
00198
00199 nub::soft_ref<SimEventQueue> seq = seqc->getQ();
00200
00201
00202
00203 manager.setOptionValString(&OPT_InputMPEGStreamPreload, "true");
00204
00205
00206 int w; int h;
00207 nub::soft_ref<FrameIstream> gb ;
00208 std::string camera("CAMERA");
00209
00210
00211 if(!manager.getExtraArg(0).compare(camera))
00212 {
00213 gb = gbc->getFrameGrabber();
00214 if (gb.isInvalid())
00215 LFATAL("You need to select a frame grabber type via the "
00216 "--fg-type=XX command-line option for this program "
00217 "to be useful -- ABORT");
00218 w = gb->getWidth(); h = gb->getHeight();
00219 std::string dims = convertToString(Dims(w, h));
00220 manager.setOptionValString(&OPT_InputFrameDims, dims);
00221 LINFO("Camera");
00222
00223
00224 gb->startStream();
00225 }
00226 else
00227 {
00228 ims->setFileName(manager.getExtraArg(0));
00229
00230 Dims iDims = ims->peekDims();
00231 manager.setOptionValString(&OPT_InputFrameDims,
00232 convertToString(ims->peekDims()));
00233
00234 w = iDims.w() - 50 + 1; h = iDims.h();
00235 LINFO("Mpeg");
00236 }
00237
00238
00239
00240 LINFO("Frame w: %d, h: %d",w, h);
00241 setupDispWin(w, h);
00242
00243
00244 int fNumOffset = 0;
00245 if (manager.numExtraArgs() > 3)
00246 fNumOffset = manager.getExtraArgAs<int>(3);
00247
00248
00249 double rtdelay = 33.3667/1000.0;
00250 double fdelay = rtdelay*3;
00251
00252
00253 manager.start();
00254
00255
00256 LFATAL("FIXME");
00257 nub::soft_ref<GistEstimatorStd> ge;
00258
00259
00260
00261 SimTime prevstime = SimTime::ZERO(); int fNum = 0;
00262 Image< PixRGB<byte> > inputImg;
00263 Image< PixRGB<byte> > gistDispImg;
00264
00265
00266 FFNtrainInfo pcInfo(manager.getExtraArg(1));
00267
00268
00269
00270 ffn_place.reset(new FeedForwardNetwork());
00271 ffn_place->init3L(pcInfo.h1Name, pcInfo.h2Name, pcInfo.oName,
00272 pcInfo.redFeatSize, pcInfo.h1size, pcInfo.h2size,
00273 pcInfo.nOutput, 0.0, 0.0);
00274
00275
00276 pcaIcaMatrix = setupPcaIcaMatrix
00277 (pcInfo.trainFolder+pcInfo.evecFname,
00278 pcInfo.oriFeatSize, pcInfo.redFeatSize);
00279
00280
00281 Timer tim(1000000); uint64 t[NAVG]; float frate = 0.0f;
00282 while(true)
00283 {
00284
00285
00286 if (fNum == 0 ||
00287 (seq->now() - 0.5 * (prevstime - seq->now())).secs() - fNum * fdelay > fdelay)
00288 {
00289 tim.reset();
00290
00291
00292 if(!manager.getExtraArg(0).compare(camera))
00293 {
00294 inputImg = gb->readRGB();
00295
00296 }
00297 else
00298 {
00299 inputImg = ims->readRGB();
00300
00301
00302 inputImg = crop(inputImg, Rectangle::tlbrI(0, 25, h-1, 25 + w - 1));
00303 }
00304
00305 if (inputImg.initialized() == false) break;
00306
00307
00308 rutz::shared_ptr<SimEventInputFrame>
00309 e(new SimEventInputFrame(brain.get(), GenericFrame(inputImg), 0));
00310 seq->post(e);
00311 LINFO("new frame :%d\n",fNum);
00312
00313
00314 if (!ge.isInvalid())
00315 {
00316
00317
00318 Image<double> cgist = ge->getGist();
00319 Image<double> in = cgist;
00320 if(pcInfo.isPCA) in = matrixMult(pcaIcaMatrix, cgist);
00321
00322
00323 Image<double> out = ffn_place->run3L(in);
00324 rutz::shared_ptr<Histogram> resHist(new Histogram(pcInfo.nOutput));
00325
00326 for(uint i = 0; i < pcInfo.nOutput; i++)
00327 {
00328 LINFO("pl[%3d]: %.4f",i, out.getVal(i));
00329 resHist->addValue(i, out.getVal(i));
00330 }
00331
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348 }
00349 else
00350 LINFO("Cannot compute gist without a Gist Estimator");
00351
00352
00353 t[fNum % NAVG] = tim.get();
00354 if (fNum % 5 == 0)
00355 {
00356 uint64 avg = 0ULL; for (int i = 0; i < NAVG; i ++) avg += t[i];
00357 frate = 1000000.0F / float(avg) * float(NAVG);
00358 printf("[%6d] Frame rate: %f fps -> %f ms/frame \n",fNum,frate, 1000.0/frate);
00359 }
00360
00361
00362 fNum++;
00363 }
00364
00365
00366 prevstime = seq->now();
00367 const SimStatus status = seq->evolve();
00368
00369
00370
00371 if (SeC<SimEventWTAwinner> e = seq->check<SimEventWTAwinner>(0))
00372 {
00373
00374
00375
00376 Image<float> fmask; std::string label;
00377 if (SeC<SimEventShapeEstimatorOutput>
00378 e = seq->check<SimEventShapeEstimatorOutput>(0))
00379 { fmask = e->smoothMask(); label = e->winningLabel(); }
00380
00381 }
00382
00383 if (SIM_BREAK == status)
00384 break;
00385 }
00386
00387
00388
00389
00390
00391
00392 manager.stop();
00393
00394
00395 return 0;
00396 }
00397
00398
00399
00400 void processSalCue(Image<PixRGB<byte> > inputImg,
00401 nub::soft_ref<StdBrain> brain, Point2D<int> winner, int fNum,
00402 const Image<float>& semask, const std::string& selabel)
00403 {
00404
00405 Image<float> roiImg;Image<PixRGB<byte> > objImg;
00406 if (semask.initialized())
00407 {
00408 float mn, mx; getMinMax(semask,mn,mx);
00409 Rectangle r = findBoundingRect(semask, mx*.05f);
00410 objImg = crop(inputImg, r);
00411 roiImg = semask * luminance(inputImg);
00412 }
00413 else
00414 {
00415 objImg = inputImg;
00416 roiImg = luminance(inputImg);
00417 }
00418
00419
00420 LFATAL("fixme");
00421 nub::soft_ref<VisualCortex> vc;
00422
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438 drawCircle(roiImg, winner, 10, 0.0f, 1);
00439 drawPoint(roiImg, winner.i, winner.j, 0.0f);
00440 LINFO("\nFrame: %d, winner: (%d,%d) in %s\n\n",
00441 fNum, winner.i, winner.j, selabel.c_str());
00442 salWin->drawImage(roiImg,0,0);
00443 salWin->drawImage(objImg,inputImg.getWidth(),0);
00444 Raster::waitForKey();
00445
00446 }
00447
00448
00449
00450 void setupDispWin(int w, int h)
00451 {
00452
00453 inputWin = new XWinManaged(Dims(w, h), 2*w, 0, "Original Input Image" );
00454 wList.add(inputWin);
00455
00456
00457
00458
00459
00460 scaleDisp = 1;
00461 while (w*scaleDisp < W_ASPECT_RATIO*.75 && h*scaleDisp < H_ASPECT_RATIO*.75)
00462 scaleDisp++;
00463
00464
00465
00466 wDisp = w*scaleDisp; hDisp = h*scaleDisp;
00467 if(wDisp/(0.0 + W_ASPECT_RATIO) > hDisp/(0.0 + H_ASPECT_RATIO))
00468 hDisp = (int)(wDisp / (0.0 + W_ASPECT_RATIO) * H_ASPECT_RATIO)+1;
00469 else
00470 wDisp = (int)(hDisp / (0.0 + H_ASPECT_RATIO) * W_ASPECT_RATIO)+1;
00471
00472
00473 sDisp = (hDisp/NUM_GIST_FEAT + 1);
00474 hDisp = sDisp * NUM_GIST_FEAT;
00475
00476
00477 wDispWin = wDisp + sDisp * NUM_GIST_COL;
00478 hDispWin = hDisp + sDisp * pcaH * 2;
00479
00480 gistWin = new XWinManaged(Dims(wDispWin, hDispWin), 0, 0, "Gist Related");
00481 wList.add(gistWin);
00482
00483 salWin = new XWinManaged(Dims(2*w, h), 0, 2*h, "Saliency Related" );
00484 wList.add(salWin);
00485 }
00486
00487
00488
00489 Image< PixRGB<byte> > getGistDispImg (Image< PixRGB<byte> > img,
00490 Image<float> gistImg,
00491 Image<float> gistPcaImg,
00492 Image<float> outHistImg)
00493 {
00494 Image< PixRGB<byte> > gistDispImg(wDispWin, hDispWin, ZEROS);
00495 int w = img.getWidth(); int h = img.getHeight();
00496
00497
00498 Image< PixRGB<byte> > tImg = img;
00499 drawGrid(tImg, w/4,h/4,1,1,PixRGB<byte>(255,255,255));
00500 inplacePaste(gistDispImg, tImg, Point2D<int>(0, 0));
00501
00502
00503 inplaceNormalize(gistImg, 0.0f, 255.0f);
00504 inplacePaste(gistDispImg, Image<PixRGB<byte> >(gistImg), Point2D<int>(wDisp, 0));
00505
00506
00507 inplaceNormalize(gistPcaImg, 0.0f, 255.0f);
00508 inplacePaste(gistDispImg, Image<PixRGB<byte> >(gistPcaImg), Point2D<int>(wDisp, hDisp));
00509
00510
00511 inplaceNormalize(outHistImg, 0.0f, 255.0f);
00512 inplacePaste(gistDispImg, Image<PixRGB<byte> >(outHistImg), Point2D<int>(0, hDisp));
00513
00514
00515 drawLine(gistDispImg, Point2D<int>(0,hDisp),
00516 Point2D<int>(wDispWin,hDisp),
00517 PixRGB<byte>(255,255,255),1);
00518 drawLine(gistDispImg, Point2D<int>(wDisp-1,0),
00519 Point2D<int>(wDisp-1,hDispWin-1),
00520 PixRGB<byte>(255,255,255),1);
00521 return gistDispImg;
00522 }
00523
00524
00525
00526 Image< PixRGB<byte> > greyWorldNormalize(Image< PixRGB<byte> > img)
00527 {
00528 Image<byte> rImg;
00529 Image<byte> gImg;
00530 Image<byte> bImg;
00531
00532
00533
00534 double rMean = mean(rImg);
00535 double gMean = mean(gImg);
00536 double bMean = mean(bImg);
00537 printf("mean = [%f,%f,%f]\n",rMean, gMean, bMean);
00538
00539 Image<float> rtImg = (rImg * (128.0/rMean)) + .5;
00540 Image<float> gtImg = (gImg * (128.0/gMean)) + .5;
00541 Image<float> btImg = (bImg * (128.0/bMean)) + .5;
00542 inplaceClamp(rtImg, 0.0f,255.0f);
00543 inplaceClamp(gtImg, 0.0f,255.0f);
00544 inplaceClamp(btImg, 0.0f,255.0f);
00545
00546 Image< PixRGB <byte> > res = makeRGB(rtImg, gtImg, btImg);
00547 return res;
00548 }
00549
00550
00551
00552
00553
00554