neovision2-cuda.C

00001 /*!@file INVT/neovision2.C CUDA-accelerated Neovision2 integrated demo */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005   //
00005 // by the University of Southern California (USC) and the iLab at USC.  //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: Rob Peters <rjpeters at usc dot edu>
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/INVT/neovision2-cuda.C $
00035 // $Id: neovision2-cuda.C 13232 2010-04-15 02:15:06Z dparks $
00036 //
00037 
00038 #include "Image/OpenCVUtil.H"  // must be first to avoid conflicting defs of int64, uint64
00039 
00040 #include "Component/GlobalOpts.H"
00041 #include "Component/ModelManager.H"
00042 #include "Component/ModelOptionDef.H"
00043 #include "Component/ModelParam.H"
00044 #include "Component/ModelParamBatch.H"
00045 #include "Devices/DeviceOpts.H"
00046 #include "Devices/IEEE1394grabber.H"
00047 #include "GUI/ImageDisplayStream.H"
00048 #include "GUI/PrefsWindow.H"
00049 #include "GUI/XWinManaged.H"
00050 #include "Image/ColorOps.H"
00051 #include "Image/CutPaste.H"
00052 #include "Image/DrawOps.H"
00053 #include "Image/FilterOps.H"
00054 #include "Image/Image.H"
00055 #include "Image/ImageSet.H"
00056 #include "Image/Layout.H"
00057 #include "Image/MathOps.H"
00058 #include "Image/Pixels.H"
00059 #include "Image/PyramidOps.H"
00060 #include "Image/ShapeOps.H"
00061 #include "Image/Transforms.H"
00062 #include "Media/FrameSeries.H"
00063 #include "Media/MediaOpts.H"
00064 #include "NeovisionII/Nv2LabelReader.H"
00065 #include "NeovisionII/nv2_common.h"
00066 #include "Neuro/NeoBrain.H"
00067 #include "Neuro/EnvInferoTemporal.H"
00068 #include "Neuro/EnvSaliencyMap.H"
00069 #include "Neuro/EnvSegmenterConfigurator.H"
00070 #include "Neuro/EnvVisualCortex.H"
00071 #include "Raster/GenericFrame.H"
00072 #include "Raster/Raster.H"
00073 #include "Transport/FrameInfo.H"
00074 #include "Transport/TransportOpts.H"
00075 #include "Util/FpsTimer.H"
00076 #include "Util/Pause.H"
00077 #include "Util/StringConversions.H"
00078 #include "Util/StringUtil.H"
00079 #include "Util/SyncJobServer.H"
00080 #include "Util/SysInfo.H"
00081 #include "Util/TextLog.H"
00082 #include "Util/WorkThreadServer.H"
00083 #include "Util/csignals.H"
00084 #include "rutz/shared_ptr.h"
00085 #include "rutz/trace.h"
00086 
00087 // conflict, both cudadefs.h and opencv define MIN and MAX
00088 #undef MIN
00089 #undef MAX
00090 
00091 #include "CUDA/CudaSaliency.H"
00092 
00093 #include <ctype.h>
00094 #include <deque>
00095 #include <iterator>
00096 #include <limits>
00097 #include <stdlib.h> // for atoi(), malloc(), free()
00098 #include <string.h>
00099 #include <sys/resource.h>
00100 #include <signal.h>
00101 #include <time.h>
00102 #include <vector>
00103 
00104 const size_t PREFERRED_TEXT_LENGTH = 42;
00105 
00106 // ######################################################################
00107 class EnvSimulationViewer : public ModelComponent
00108 {
00109 public:
00110   EnvSimulationViewer(OptionManager& mgr);
00111 
00112   virtual ~EnvSimulationViewer();
00113 
00114   virtual void paramChanged(ModelParamBase* const param, const bool valueChanged, ParamClient::ChangeStatus* status);
00115 
00116   bool shouldQuit() const { return itsDoQuit; }
00117 
00118   OModelParam<Dims> itsInputDims;
00119   OModelParam<size_t> optDispZoom;
00120   OModelParam<size_t> optInputReduce;
00121   OModelParam<std::string> optMainwinTitle;
00122   OModelParam<bool> itsSaveVcx;
00123   OModelParam<bool> itsSaveSm;
00124 
00125   bool itsDoQuit;
00126 };
00127 
00128 
00129 // ######################################################################
00130 static const ModelOptionDef OPT_DispZoom =
00131   { MODOPT_ARG(size_t), "EsvDispZoom", &MOC_OUTPUT, OPTEXP_CORE,
00132     "Number of octaves to zoom in on the small maps",
00133     "disp-zoom", '\0', "size_t", "4" };
00134 
00135 static const ModelOptionDef OPT_InputReduce =
00136   { MODOPT_ARG(size_t), "EsvInputReduce", &MOC_OUTPUT, OPTEXP_CORE,
00137     "Number of octaves to reduce the input by, for display purposes only",
00138     "input-reduce", '\0', "size_t", "0" };
00139 
00140 static const ModelOptionDef OPT_MainwinTitle =
00141   { MODOPT_ARG_STRING, "MainwinTitle", &MOC_OUTPUT, OPTEXP_CORE,
00142     "Title to use for main output window",
00143     "mainwin-title", '\0', "<string>", "neovision2" };
00144 
00145 static const ModelOptionDef OPT_SaveVcx =
00146   { MODOPT_FLAG, "SaveVcx", &MOC_OUTPUT, OPTEXP_CORE,
00147     "Whether to save the VisualCortex (VCX) output",
00148     "save-vcx", '\0', "", "false" };
00149 
00150 static const ModelOptionDef OPT_SaveSm =
00151   { MODOPT_FLAG, "SaveSm", &MOC_OUTPUT, OPTEXP_CORE,
00152     "Whether to save the SaliencyMap (Sm) output",
00153     "save-sm", '\0', "", "false" };
00154 
00155 // ######################################################################
00156 EnvSimulationViewer::EnvSimulationViewer(OptionManager& mgr) :
00157   ModelComponent(mgr, "Embeddable Simulation Viewer", "EnvSimulationViewer"),
00158   itsInputDims(&OPT_InputFrameDims, this),
00159   optDispZoom(&OPT_DispZoom, this, ALLOW_ONLINE_CHANGES),
00160   optInputReduce(&OPT_InputReduce, this, ALLOW_ONLINE_CHANGES),
00161   optMainwinTitle(&OPT_MainwinTitle, this),
00162   itsSaveVcx(&OPT_SaveVcx, this),
00163   itsSaveSm(&OPT_SaveSm, this),
00164   itsDoQuit(false)
00165 { }
00166 
00167 EnvSimulationViewer::~EnvSimulationViewer()
00168 { }
00169 
00170 void EnvSimulationViewer::paramChanged(ModelParamBase* const param, const bool valueChanged,
00171                                        ParamClient::ChangeStatus* status)
00172 {
00173   if (param == &itsInputDims) {
00174     const size_t excess_size = size_t(0.5 * log2(itsInputDims.getVal().sz() / 1000000.0));
00175     if (excess_size > optInputReduce.getVal()) optInputReduce.setVal(excess_size);
00176   } else if (param == &optInputReduce) {
00177     const size_t val = optInputReduce.getVal();
00178     const Dims d = itsInputDims.getVal();
00179 
00180     if (val > 16) *status = ParamClient::CHANGE_REJECTED;
00181     else if (d.isNonEmpty() && val > 0 && ((d.w() / (1 << val)) < 32 || (d.h() / (1 << val)) < 32))
00182       *status = ParamClient::CHANGE_REJECTED;
00183   }
00184 }
00185 
00186 // ######################################################################
00187 struct Nv2UiData
00188 {
00189   Nv2UiData(const int map_zoom_) :
00190     accepted_training_label(),
00191     remote_command(),
00192     map_zoom(map_zoom_),
00193     targetLoc(-1,-1),
00194     ncpu(numCpus()),
00195     text_log_file("")
00196   { }
00197 
00198   FpsTimer::State time_state;
00199   std::string accepted_training_label;
00200   std::string remote_command;
00201   const int map_zoom;
00202   Point2D<int> targetLoc;
00203   const int ncpu;
00204   std::string text_log_file;
00205 };
00206 
00207 // ######################################################################
00208 class Nv2UiJob : public JobServer::Job
00209 {
00210 public:
00211   Nv2UiJob(OutputFrameSeries* ofs_, EnvSimulationViewer* esv_, EnvInferoTemporal* eit_,
00212            const Nv2UiData& uidata_, EnvSaliencyMap* sm_, EnvSegmenter* ese_,
00213            NeoBrain* nb_, Image<PixRGB<byte> > rgbin_, Image<byte> vcxmap_,
00214            Image<byte> Imap_, Image<byte> Cmap_, Image<byte> Omap_, Image<byte> Fmap_, Image<byte> Mmap_) :
00215     ofs(ofs_), esv(esv_), eit(eit_), uidata(uidata_), sm(sm_), ese(ese_), neoBrain(nb_), rgbin(rgbin_),
00216     vcxmap(vcxmap_), Imap(Imap_), Cmap(Cmap_), Omap(Omap_), Fmap(Fmap_), Mmap(Mmap_),
00217     m_dispzoom(1 << esv->optDispZoom.getVal()), m_inputreduce(esv->optInputReduce.getVal())
00218   { }
00219 
00220   // ####################
00221   unsigned int getHalfZoom() const
00222   {
00223     const int div = 4;
00224     return std::max(size_t(1), m_dispzoom/div);
00225   }
00226 
00227   // ####################
00228   Layout<PixRGB<byte> > makeInputMarkup(const Rectangle& foa, const Image<byte>& foamask,
00229                                         const EnvSaliencyMap::State& smstate, const uint32_t patch_id) const
00230   {
00231     Image<PixRGB<byte> > markup = rgbin;
00232 
00233     if (foa.isValid()) drawRectSquareCorners(markup, foa, PixRGB<byte>(255, 255, 0), 3 << m_inputreduce);
00234 
00235     if (uidata.targetLoc.isValid())
00236       drawCircle(markup, uidata.targetLoc, 3, PixRGB<byte>(60, 220, 255), 3 << m_inputreduce);
00237 
00238     // draw the first most salient loc:
00239     drawRectSquareCorners(markup,
00240                           Rectangle(smstate.fullres_maxpos - uidata.map_zoom/2, Dims(uidata.map_zoom, uidata.map_zoom)),
00241                           PixRGB<byte>(255, 0, 0), 3 << m_inputreduce);
00242 
00243     // draw the next n most salient loc:
00244     for (uint i = 1; i < smstate.nMostSalientLoc.size(); ++i)
00245       {
00246         const EnvSaliencyMap::LocInfo locInfo = smstate.nMostSalientLoc[i];
00247         drawRectSquareCorners(markup, Rectangle(locInfo.fullres_maxpos - uidata.map_zoom/2,
00248                                                 Dims(uidata.map_zoom, uidata.map_zoom)),
00249                               PixRGB<byte>(150, 0, 0), 3 << m_inputreduce);
00250       }
00251 
00252     for (size_t i = 0; i < m_inputreduce; ++i) markup = decXY(markup);
00253 
00254     if (foamask.initialized()) drawContour2D(rescaleNI(foamask, markup.getDims()), markup, PixRGB<byte>(0,255,0), 2);
00255 
00256     const std::string lines[2] = {
00257       sformat("peak %3d in %3dx%3d foa @ (%3d,%3d)", int(smstate.maxval),
00258               foa.isValid() ? foa.width() : -1, foa.isValid() ? foa.height() : -1,
00259               smstate.fullres_maxpos.i, smstate.fullres_maxpos.j),
00260         sformat("%s #%06u [%5.2ffps, %5.1f%%CPU]", convertToString(uidata.time_state.elapsed_time).c_str(),
00261                 (unsigned int) patch_id, uidata.time_state.recent_fps, uidata.time_state.recent_cpu_usage*100.0)
00262     };
00263 
00264     const Image<PixRGB<byte> > textarea =
00265       makeMultilineTextBox(markup.getWidth(), &lines[0], 2,
00266                            PixRGB<byte>(255, 255, 0), PixRGB<byte>(0,0,0), PREFERRED_TEXT_LENGTH);
00267 
00268     return vcat(markup, textarea);
00269   }
00270 
00271   // ####################
00272   Layout<PixRGB<byte> >
00273   makeSalmapMarkup(const EnvSaliencyMap::State& smstate) const
00274   {
00275     Image<PixRGB<byte> > zoomedsm = zoomXY(smstate.salmap, m_dispzoom, m_dispzoom);
00276 
00277     // draw the first most salient loc:
00278     drawRectSquareCorners(zoomedsm, Rectangle(smstate.lowres_maxpos * m_dispzoom, Dims(m_dispzoom, m_dispzoom)),
00279                           PixRGB<byte>(255, 0, 0), 3);
00280 
00281     // draw the next n most salient locs:
00282     for (uint i = 1; i < smstate.nMostSalientLoc.size(); ++i)
00283       {
00284         const EnvSaliencyMap::LocInfo locInfo = smstate.nMostSalientLoc[i];
00285         drawRectSquareCorners(zoomedsm, Rectangle(locInfo.lowres_maxpos * m_dispzoom, Dims(m_dispzoom, m_dispzoom)),
00286                               PixRGB<byte>(150, 0, 0), 3);
00287       }
00288 
00289     const std::string valstring = sformat("%d", int(smstate.maxval));
00290 
00291     const SimpleFont font = SimpleFont::fixedMaxWidth(zoomedsm.getWidth() / 30);
00292 
00293     Point2D<int> textpos = smstate.lowres_maxpos * m_dispzoom;
00294     textpos.j -= font.h() + 2; if (textpos.j < 0) textpos.j += m_dispzoom + 2;
00295 
00296     writeText(zoomedsm, textpos, valstring.c_str(),
00297               PixRGB<byte>(255, 0, 0), PixRGB<byte>(0, 0, 0), font, true);
00298 
00299     Image<PixRGB<byte> > histo =
00300       neoBrain->getSaliencyHisto(Dims(zoomedsm.getWidth(), 62), PixRGB<byte>(0,0,0), PixRGB<byte>(180,180,180));
00301     return vcat(zoomedsm, histo);
00302   }
00303 
00304   // ####################
00305   Layout<PixRGB<byte> > makeCmapsMarkup() const
00306   {
00307     unsigned int halfzoom = this->getHalfZoom() / 2;
00308 
00309     Image<PixRGB<byte> > cmaps[] = {
00310       zoomXY(Imap, halfzoom, halfzoom),
00311       zoomXY(Cmap, halfzoom, halfzoom),
00312       zoomXY(Omap, halfzoom, halfzoom),
00313       zoomXY(Fmap, halfzoom, halfzoom),
00314       zoomXY(Mmap, halfzoom, halfzoom),
00315       zoomXY(vcxmap, halfzoom, halfzoom)
00316     };
00317 
00318     const char* labels[] = { "I", "C", "O", "F", "M", "VC" };
00319 
00320     for (size_t i = 0; i < sizeof(labels) / sizeof(labels[0]); ++i) {
00321       const SimpleFont font = SimpleFont::fixedMaxWidth(cmaps[i].getWidth() / 20);
00322       writeText(cmaps[i], Point2D<int>(1,1), labels[i], PixRGB<byte>(0), PixRGB<byte>(255), font);
00323       drawLine(cmaps[i], Point2D<int>(0,0), Point2D<int>(cmaps[i].getWidth()-1,0), PixRGB<byte>(255), 1);
00324       drawLine(cmaps[i], Point2D<int>(0,0), Point2D<int>(0,cmaps[i].getHeight()-1), PixRGB<byte>(255), 1);
00325     }
00326 
00327     const size_t nrows = 2;
00328 
00329     return arrcat(&cmaps[0], sizeof(cmaps) / sizeof(cmaps[0]), (sizeof(cmaps) / sizeof(cmaps[0]) + (nrows-1)) / nrows);
00330   }
00331 
00332   // ####################
00333   Image<PixRGB<byte> > makeInhibitionMarkup() const
00334   {
00335     Image<byte> inh = sm->getInhibmap();
00336     if (!inh.initialized()) inh = Image<byte>(vcxmap.getDims(), ZEROS);
00337 
00338     Image<byte> inr = Image<byte>(sm->getInertiaMap());
00339     if (!inr.initialized()) inr = Image<byte>(vcxmap.getDims(), ZEROS);
00340 
00341     Image<PixRGB<byte> > rgb(vcxmap.getDims(), NO_INIT);
00342     Image<PixRGB<byte> >::iterator aptr = rgb.beginw();
00343     Image<PixRGB<byte> >::iterator stop = rgb.endw();
00344 
00345     Image<byte>::const_iterator rptr = inh.begin();
00346     Image<byte>::const_iterator gptr = inr.begin();
00347 
00348     while (aptr != stop) *aptr++ = PixRGB<byte>(*rptr++, *gptr++, 0);
00349 
00350     return zoomXY(rgb, getHalfZoom() / 2);
00351   }
00352 
00353   // ####################
00354   Image<PixRGB<byte> > makeMeters(const size_t nx, const Dims& meterdims) const
00355   {
00356     const double maxcpu = uidata.ncpu <= 0 ? 100.0 : uidata.ncpu * 100.0;
00357 
00358     const double nothresh = std::numeric_limits<double>::max();
00359 
00360     const MeterInfo infos[] = {
00361       { "dvcx/dt", sm->getVcxFlicker(), 1.0, nothresh, PixRGB<byte>(0, 255, 0) },
00362       { "dfactor", sm->getDynamicFactor(), 1.0, nothresh, PixRGB<byte>(128, 0, 255) },
00363       { "boringness", neoBrain->getBoringness(), 128.0, nothresh, PixRGB<byte>(192, 255, 0) },
00364       { "excitement", neoBrain->getExcitementLevel(), 256.0, nothresh, PixRGB<byte>(255, 0, 32) },
00365       { "sleepiness", neoBrain->getSleepLevel(), 1000.0, nothresh, PixRGB<byte>(255, 0, 32) },
00366       { "confidence", eit->getMaxConfidence(), 1.0, eit->getConfidenceThresh(), PixRGB<byte>(0, 255, 128) },
00367       { "cpu%", uidata.time_state.recent_cpu_usage*100.0, maxcpu, nothresh, PixRGB<byte>(255, 165, 0) },
00368       { "fps", uidata.time_state.recent_fps, 60.0, nothresh, PixRGB<byte>(0, 128, 255) }
00369     };
00370 
00371     return drawMeters(&infos[0], sizeof(infos) / sizeof(infos[0]), nx, meterdims);
00372   }
00373 
00374   // ####################
00375   virtual void run()
00376   {
00377     Point2D<int> scaled_maxpos(-1,-1);
00378 
00379     const nub::soft_ref<ImageDisplayStream> ids = ofs->findFrameDestType<ImageDisplayStream>();
00380 
00381     const rutz::shared_ptr<XWinManaged> uiwin = ids.is_valid() ?
00382       ids->getWindow(esv->optMainwinTitle.getVal()) : rutz::shared_ptr<XWinManaged>();
00383 
00384     Point2D<int> forceTrackLocation(-1,-1);
00385 
00386     if (uiwin.is_valid()) {
00387       XButtonEvent ev;
00388       if (uiwin->getLastButtonEvent(&ev) && ev.button == 1) forceTrackLocation = Point2D<int>(ev.x, ev.y);
00389      }
00390 
00391     if (forceTrackLocation.isValid()) {
00392       const Point2D<int> candidate = forceTrackLocation * (1 << m_inputreduce) + (1 << m_inputreduce) / 2;
00393 
00394       if (rgbin.coordsOk(candidate)) {
00395         scaled_maxpos = candidate;
00396         neoBrain->setTarget(scaled_maxpos, rgbin, -1);
00397         neoBrain->setKeepTracking(true);
00398       }
00399     } else if (uidata.targetLoc.isValid()) {
00400       scaled_maxpos = uidata.targetLoc;
00401       ASSERT(rgbin.coordsOk(scaled_maxpos));
00402     }
00403 
00404     const EnvSaliencyMap::State smstate = sm->getSalmap(vcxmap, scaled_maxpos);
00405 
00406     neoBrain->updateBoringness(smstate.salmap, smstate.maxval);
00407     neoBrain->updateExcitement(sm->getVcxFlicker());
00408 
00409     Image<byte> foamask;
00410     Image<PixRGB<byte> > segmentdisp;
00411 
00412     // Send the first most salient locations to be identified
00413     const Rectangle foa = ese->getFoa(rgbin, smstate.fullres_maxpos, &foamask, &segmentdisp);
00414 
00415     if (foa.isValid()) {
00416       const Point2D<int> objCenter = Point2D<int>(foa.topLeft().i + foa.width()/2, foa.topLeft().j + foa.height()/2);
00417       neoBrain->setTarget(objCenter, rgbin, smstate.maxval);
00418     } else if (!uidata.targetLoc.isValid())
00419       neoBrain->setTarget(smstate.fullres_maxpos, rgbin, smstate.maxval);
00420 
00421     const uint32_t patch_id = uidata.time_state.frame_number;
00422     LINFO("Sendind attended patch at (%d,%d) to EIT", foa.topLeft().i + foa.width()/2, foa.topLeft().j + foa.height()/2);
00423     eit->sendPatch(patch_id, rgbin, foa,
00424                    uidata.time_state.elapsed_time,
00425                    uidata.accepted_training_label.length() > 0,
00426                    uidata.accepted_training_label,
00427                    uidata.remote_command,
00428                    smstate.fullres_maxpos);
00429 
00430     // Send the next N most salient locations to be identified:
00431     for (uint i = 1; i < smstate.nMostSalientLoc.size(); ++i) {
00432       Image<byte> nextFoamask;
00433       Image<PixRGB<byte> > nextSegmentdisp;
00434 
00435       const EnvSaliencyMap::LocInfo locInfo = smstate.nMostSalientLoc[i];
00436       const Rectangle nextFoa = ese->getFoa(rgbin, locInfo.fullres_maxpos, &nextFoamask, &nextSegmentdisp);
00437       LINFO("Sendind attended patch at (%d,%d) to EIT", nextFoa.topLeft().i + nextFoa.width()/2, nextFoa.topLeft().j + nextFoa.height()/2);
00438 
00439       eit->sendPatch(patch_id + 1000000*i /* use different IDs for different patches*/, rgbin, nextFoa,
00440                      uidata.time_state.elapsed_time,
00441                      uidata.accepted_training_label.length() > 0,
00442                      uidata.accepted_training_label,
00443                      uidata.remote_command,
00444                      locInfo.fullres_maxpos);
00445     }
00446 
00447     // log various bits of info (these calls will do nothing if the log filename is empty):
00448     textLog(uidata.text_log_file, "FOAbox", convertToString(foa));
00449 
00450     const FrameState os = ofs->updateNext();
00451 
00452     // save maps if requested:
00453     if (esv->itsSaveVcx.getVal()) ofs->writeGray(vcxmap, "VCO", FrameInfo("VisualCortex output map", SRC_POS));
00454 
00455     if (esv->itsSaveSm.getVal()) ofs->writeGray(smstate.salmap, "SM", FrameInfo("SaliencyMap output map", SRC_POS));
00456 
00457     // ##### compact displays
00458     Layout<PixRGB<byte> > img;
00459 
00460     // let's start with an HD display of the input + markups:
00461     Image<PixRGB<byte> > markup = rgbin;
00462     if (foa.isValid()) drawRectSquareCorners(markup, foa, PixRGB<byte>(255, 255, 0), 3 << m_inputreduce);
00463 
00464     if (uidata.targetLoc.isValid())
00465       drawCircle(markup, uidata.targetLoc, 3, PixRGB<byte>(60, 220, 255), 3 << m_inputreduce);
00466 
00467     // draw the first most salient loc
00468     drawRectSquareCorners(markup,
00469                           Rectangle(smstate.fullres_maxpos - uidata.map_zoom/2,
00470                                     Dims(uidata.map_zoom, uidata.map_zoom)),
00471                           PixRGB<byte>(255, 0, 0), 3 << m_inputreduce);
00472 
00473     // draw the next n most salient locs:
00474     for (uint i = 1; i < smstate.nMostSalientLoc.size(); ++i) {
00475       const EnvSaliencyMap::LocInfo locInfo = smstate.nMostSalientLoc[i];
00476       drawRectSquareCorners(markup,
00477                             Rectangle(locInfo.fullres_maxpos - uidata.map_zoom/2,
00478                               Dims(uidata.map_zoom, uidata.map_zoom)), PixRGB<byte>(150, 0, 0), 3 << m_inputreduce);
00479     }
00480 
00481     for (size_t i = 0; i < m_inputreduce; ++i) markup = decXY(markup);
00482 
00483     if (foamask.initialized()) drawContour2D(rescaleNI(foamask,markup.getDims()), markup, PixRGB<byte>(0,255,0), 2);
00484 
00485     // that's it for this window, let's send it out to display:
00486     ofs->writeRGB(markup, esv->optMainwinTitle.getVal(), FrameInfo("copy of input", SRC_POS));
00487 
00488     // the salmap:
00489     const unsigned int halfzoom = 8;
00490     Image<PixRGB<byte> > zoomedsm = zoomXY(smstate.salmap, halfzoom, halfzoom);
00491 
00492     drawRectSquareCorners(zoomedsm, Rectangle(smstate.lowres_maxpos * halfzoom, Dims(halfzoom, halfzoom)),
00493                           PixRGB<byte>(255, 0, 0), 3);
00494 
00495     const std::string valstring = sformat("%d", int(smstate.maxval));
00496     const SimpleFont font = SimpleFont::fixedMaxWidth(zoomedsm.getWidth() / 30);
00497     Point2D<int> textpos = smstate.lowres_maxpos * halfzoom;
00498     textpos.j -= font.h() + 2; if (textpos.j < 0) textpos.j += halfzoom + 2;
00499     writeText(zoomedsm, textpos, valstring.c_str(), PixRGB<byte>(255, 0, 0), PixRGB<byte>(0, 0, 0), font, true);
00500 
00501     Image<PixRGB<byte> > inh = this->makeInhibitionMarkup();
00502     drawLine(inh, Point2D<int>(0,0), Point2D<int>(inh.getWidth()-1,0), PixRGB<byte>(255, 255, 255), 1);
00503     drawLine(inh, Point2D<int>(0,0), Point2D<int>(0,inh.getHeight()-1), PixRGB<byte>(255, 255, 255), 1);
00504 
00505     if (!segmentdisp.initialized())
00506       segmentdisp = Image<PixRGB<byte> >(inh.getDims(), ZEROS);
00507     else {
00508       segmentdisp = rescaleNI(segmentdisp, inh.getDims());
00509       drawContour2D(rescaleNI(foamask, inh.getDims()), segmentdisp, PixRGB<byte>(0,255,0), 2);
00510     }
00511     drawLine(segmentdisp, Point2D<int>(0,0), Point2D<int>(inh.getWidth()-1,0),
00512              PixRGB<byte>(255, 255, 255), 1);
00513     drawLine(segmentdisp, Point2D<int>(0,0), Point2D<int>(0,inh.getHeight()-1),
00514              PixRGB<byte>(255, 255, 255), 1);
00515     Layout<PixRGB<byte> > inl = vcat(inh, segmentdisp);
00516 
00517     img = vcat(zoomedsm, this->makeMeters(2, Dims(zoomedsm.getDims().w() / 2, 13)));
00518 
00519     // now some info:
00520     const std::string lines[1] =
00521       {
00522         sformat("peak %3d in %3dx%3d foa @ (%4d,%4d) %04dx%04d %s #%06u [%3.2ffps, %4.1f%%CPU]",
00523                 int(smstate.maxval),
00524                 foa.isValid() ? foa.width() : -1,
00525                 foa.isValid() ? foa.height() : -1,
00526                 smstate.fullres_maxpos.i,
00527                 smstate.fullres_maxpos.j,
00528                 rgbin.getWidth(), rgbin.getHeight(),
00529                 convertToString(uidata.time_state.elapsed_time).c_str(),
00530                 (unsigned int) patch_id,
00531                 uidata.time_state.recent_fps,
00532                 uidata.time_state.recent_cpu_usage*100.0)
00533       };
00534 
00535     const Image<PixRGB<byte> > textarea =
00536       makeMultilineTextBox(img.getWidth(), &lines[0], 1, PixRGB<byte>(255, 255, 0), PixRGB<byte>(0,0,0),
00537                            PREFERRED_TEXT_LENGTH, 10);
00538     img = vcat(img, textarea);
00539 
00540     // now the cmaps and friends:
00541     const Layout<PixRGB<byte> > cmaps = this->makeCmapsMarkup();
00542     inl = hcat(cmaps, inl);
00543     img = vcat(img, inl);
00544 
00545     ofs->writeRgbLayout(img, "neovision2 maps", FrameInfo("copy of input", SRC_POS));
00546 
00547     std::vector<Nv2LabelReader::LabeledImage> images = eit->getLabeledImages(PREFERRED_TEXT_LENGTH);
00548 
00549     for (size_t i = 0; i < images.size(); ++i)
00550       {
00551         ofs->writeRGB(images[i].img, images[i].ident, FrameInfo("object-labeled image", SRC_POS));
00552         neoBrain->sayObjectLabel(images[i].label, /*confidence = */ 0, true);
00553       }
00554 
00555     if (os == FRAME_FINAL) esv->itsDoQuit = true;
00556   }
00557 
00558   // ####################
00559   virtual const char* jobType() const { return "Nv2UiJob"; }
00560 
00561 private:
00562   OutputFrameSeries* const ofs;
00563   EnvSimulationViewer* const esv;
00564   EnvInferoTemporal* const eit;
00565   const Nv2UiData uidata;
00566   EnvSaliencyMap* const sm;
00567   EnvSegmenter* const ese;
00568   NeoBrain* const neoBrain;
00569   Image<PixRGB<byte> > rgbin;
00570   const Image<byte> vcxmap;
00571   const Image<byte> Imap;
00572   const Image<byte> Cmap;
00573   const Image<byte> Omap;
00574   const Image<byte> Fmap;
00575   const Image<byte> Mmap;
00576   const size_t m_dispzoom;
00577   const size_t m_inputreduce;
00578 };
00579 
00580 // ######################################################################
00581 static const ModelOptionDef OPT_WithObjrecMode =
00582   { MODOPT_FLAG, "WithObjrecMode", &MOC_OUTPUT, OPTEXP_CORE,
00583     "Whether to include an 'objrec' mode which toggles parameters "
00584     "to values suitable for object recognition training.",
00585     "with-objrec-mode", '\0', "", "true" };
00586 
00587 static const ModelOptionDef OPT_ALIASHDDemo =
00588   { MODOPT_ALIAS, "ALIASHDDemo", &MOC_ALIAS, OPTEXP_CORE,
00589     "Set parameters for the hd camera on ilab24",
00590     "hd-demo", '\0', "",
00591     "--in=XC "
00592     "--framegrabber-dims=1920x1080 "
00593     "--patch-reader=192.168.0.229:9930 "
00594     "--disp-zoom=3 "
00595     "--with-objrec-mode "
00596     "--evc-multithreaded "
00597   };
00598 
00599 // ######################################################################
00600 int submain(int argc, const char** argv)
00601 {
00602   volatile int signum = 0;
00603   signal(SIGPIPE, SIG_IGN);
00604   catchsignals(&signum);
00605 
00606   // Instantiate our various ModelComponents:
00607 
00608   ModelManager manager("Nv2");
00609 
00610   OModelParam<bool> optWithObjrecMode(&OPT_WithObjrecMode, &manager);
00611   OModelParam<std::string> optTextLogFile(&OPT_TextLogFile, &manager);
00612 
00613   nub::ref<EnvSimulationViewer> esv(new EnvSimulationViewer(manager));
00614   manager.addSubComponent(esv);
00615 
00616   nub::ref<InputFrameSeries> ifs(new InputFrameSeries(manager));
00617   manager.addSubComponent(ifs);
00618 
00619   nub::ref<OutputFrameSeries> ofs(new OutputFrameSeries(manager));
00620   manager.addSubComponent(ofs);
00621 
00622   nub::ref<CudaSaliency> cus(new CudaSaliency(manager));
00623   manager.addSubComponent(cus);
00624 
00625   nub::ref<EnvSaliencyMap> esm(new EnvSaliencyMap(manager));
00626   manager.addSubComponent(esm);
00627 
00628   nub::ref<EnvSegmenterConfigurator> esec(new EnvSegmenterConfigurator(manager));
00629   manager.addSubComponent(esec);
00630 
00631   nub::ref<EnvInferoTemporal> eit(new EnvInferoTemporal(manager));
00632   manager.addSubComponent(eit);
00633 
00634   nub::ref<NeoBrain> neoBrain(new NeoBrain(manager));
00635   manager.addSubComponent(neoBrain);
00636 
00637   manager.requestOptionAlias(&OPT_ALIASHDDemo);
00638 
00639   manager.exportOptions(MC_RECURSE);
00640 
00641 #if defined(HAVE_IEEE1394)
00642   // input comes from firewire camera 640x480/rgb/15fps by default
00643   manager.setOptionValString(&OPT_InputFrameSource, "ieee1394");
00644   manager.setOptionValString(&OPT_FrameGrabberMode, "RGB24");
00645   manager.setOptionValString(&OPT_FrameGrabberDims, "640x480");
00646   manager.setOptionValString(&OPT_FrameGrabberFPS, "15");
00647 #elif defined(HAVE_QUICKTIME_QUICKTIME_H)
00648   manager.setOptionValString(&OPT_InputFrameSource, "qtgrab");
00649   manager.setOptionValString(&OPT_FrameGrabberDims, "640x480");
00650 #endif
00651 
00652   // output goes to the screen by default
00653   manager.setOptionValString(&OPT_OutputFrameSink, "display");
00654 
00655   // change some default values
00656   manager.setOptionValString(&OPT_EsmInertiaHalfLife, "60");
00657   manager.setOptionValString(&OPT_EsmIorStrength, "8.0");
00658 
00659   if (manager.parseCommandLine(argc, argv, "<ip1:port1,ip2:port2,...>", 0, 1) == false) return(1);
00660 
00661   eit->initReaders(manager.numExtraArgs() > 0 ? manager.getExtraArg(0) : "");
00662 
00663   manager.start();
00664 
00665   neoBrain->init(ifs->peekDims());
00666 
00667   Nv2UiData uidata(1 << /* evc->getMapLevel()*/ 4);
00668   uidata.text_log_file = optTextLogFile.getVal();
00669 
00670   PrefsWindow pwin("control panel", SimpleFont::FIXED(8));
00671   pwin.setValueNumChars(16);
00672 
00673   pwin.addPrefsForComponent(esv.get());
00674   pwin.addPrefsForComponent(esm.get());
00675   pwin.addPrefsForComponent(esec->getSeg().get());
00676   pwin.addPrefsForComponent(eit.get());
00677   pwin.addPrefsForComponent(neoBrain.get(), true);
00678 
00679   PrefItemBln prefPause(&pwin, "pause", false);
00680   PrefItemStr prefRemoteCommand(&pwin, "remote command", uidata.remote_command);
00681   PrefItemBln prefInTrainingMode(&pwin, "in training mode", false);
00682   PrefItemBln prefInObjRecMode(optWithObjrecMode.getVal() ? &pwin : 0, "in ObjRec mode", false);
00683   PrefItemBln prefDoGrabFrame(&pwin, "grab frame", true);
00684   PrefItemBln prefCommitTrainingImage(&pwin, "commit training image", false);
00685   PrefItemBln prefCommitTrainingImageConfirm(&pwin, "confirm commit ??", false);
00686   PrefItemStr prefTrainingLabel(&pwin, "training label", "");
00687   PrefItemByt prefFontSize(&pwin, "font size", 6);
00688 
00689   PrefsWindow inputprefs;
00690   inputprefs.addPrefsForComponent(ifs->getFrameSource().get());
00691   pwin.setFont(SimpleFont::fixedMaxWidth(prefFontSize.get()));
00692   inputprefs.setFont(SimpleFont::fixedMaxWidth(prefFontSize.get()));
00693 
00694   PauseWaiter p;
00695 
00696   int retval = 0;
00697 
00698   rutz::shared_ptr<JobServer> uiq;
00699   // set up a background job server with one worker thread to
00700   // handle the ui jobs:
00701   rutz::shared_ptr<WorkThreadServer> tsrv(new WorkThreadServer("neovision2-ui", 1));
00702 
00703   // keep max latency low, and if we get bogged down, then drop
00704   // old frames rather than new ones
00705   tsrv->setMaxQueueSize(2);
00706   tsrv->setDropPolicy(WorkThreadServer::DROP_OLDEST);
00707   tsrv->setFlushBeforeStopping(false);
00708   uiq = tsrv;
00709 
00710   ASSERT(uiq.get() != 0);
00711 
00712   ifs->startStream();
00713 
00714   const GenericFrameSpec fspec = ifs->peekFrameSpec();
00715 
00716   FpsTimer fps_timer;
00717 
00718   bool previous_training_mode = prefInTrainingMode.get();
00719   bool previous_do_fixed = esm->getUseFixed();
00720   Image<PixRGB<byte> > rgbin_last;
00721 
00722   ModelParamBatch objrecParams;
00723   objrecParams.addParamValue("EseDynamicFoa", false);
00724   objrecParams.addParamValue("EseFoaSize", 80);
00725   objrecParams.addParamValue("NeobrainBoringnessThresh", 2000);
00726   objrecParams.addParamValue("NeobrainTargetFramesThresh", (unsigned long) 2000);
00727   objrecParams.addParamValue("NeobrainNoMoveFramesThresh", (unsigned long) 2000);
00728 
00729   bool previous_objrec_mode = prefInObjRecMode.get();
00730 
00731   while (true)
00732     {
00733       if (signum != 0) {
00734           LINFO("quitting because %s was caught", signame(signum));
00735           retval = -1;
00736           break;
00737       }
00738 
00739       if (ofs->becameVoid()) {
00740         LINFO("quitting because output stream was closed or became void");
00741         break;
00742       }
00743 
00744       if (esv->shouldQuit()) break;
00745 
00746       //
00747       // update preferences window and uidata
00748       //
00749 
00750       pwin.update(); // handle pending preference window events
00751 
00752       setPause(prefPause.get());
00753       uidata.remote_command = prefRemoteCommand.get();
00754 
00755       prefCommitTrainingImage.setDisabled(!prefInTrainingMode.get());
00756       prefCommitTrainingImageConfirm.setDisabled(!prefInTrainingMode.get());
00757       prefTrainingLabel.setDisabled(!prefInTrainingMode.get());
00758 
00759       pwin.setFont(SimpleFont::fixedMaxWidth(prefFontSize.get()));
00760 
00761       inputprefs.setFont(SimpleFont::fixedMaxWidth(prefFontSize.get()));
00762       inputprefs.update();
00763 
00764       if (prefInObjRecMode.get()) {
00765         if (!previous_objrec_mode) objrecParams.installValues(&manager); // save previous values
00766       } else {
00767         if (previous_objrec_mode) objrecParams.restoreValues(&manager); //restore values
00768       }
00769 
00770       previous_objrec_mode = prefInObjRecMode.get();
00771 
00772       // This code enforces the "training mode" logic
00773       //  .. i.e., certain combinations of preferences are not possible.
00774       uidata.accepted_training_label = "";
00775 
00776       if (prefInTrainingMode.get()) {
00777         if (!previous_training_mode) previous_do_fixed = esm->getUseFixed();
00778 
00779           esm->setUseFixed(true);
00780 
00781           if (prefCommitTrainingImageConfirm.get()) {
00782             if (!prefCommitTrainingImage.get())
00783               prefCommitTrainingImageConfirm.set(false);
00784             else if (prefTrainingLabel.get().length() <= 3) {
00785               prefCommitTrainingImage.set(false);
00786               prefCommitTrainingImageConfirm.set(false);
00787               prefTrainingLabel.set("");
00788 
00789               LERROR("invalid training label %s (too short)", prefTrainingLabel.get().c_str());
00790             } else {
00791               // OK, we accept the training label as a valid one
00792               // and send it off to the labelers:
00793               uidata.accepted_training_label = prefTrainingLabel.get();
00794             }
00795           }
00796       } else {
00797         // training mode is off, certain settings not possible
00798         prefDoGrabFrame.set(true);
00799         prefCommitTrainingImage.set(false);
00800         prefCommitTrainingImageConfirm.set(false);
00801         prefTrainingLabel.set("");
00802 
00803         // this just handles unfixing window when training is first toggled off
00804         if (previous_training_mode) esm->setUseFixed(previous_do_fixed);
00805       }
00806 
00807       previous_training_mode = prefInTrainingMode.get();
00808 
00809       if (p.checkPause()) continue;
00810 
00811       //
00812       // get the next frame from our input source
00813       //
00814 
00815       const FrameState is = ifs->updateNext();
00816       if (is == FRAME_COMPLETE) break;
00817 
00818       GenericFrame input = ifs->readFrame();
00819       if (!input.initialized()) break;
00820 
00821       // only read in from camera if do_grab_frame
00822       const Image<PixRGB<byte> > rgbin = prefDoGrabFrame.get() ? input.asRgb() : rgbin_last;
00823 
00824       rgbin_last = rgbin;
00825 
00826       if (eit->belowConfidenceThresh()) uidata.targetLoc = neoBrain->trackObject(rgbin);
00827       else uidata.targetLoc = Point2D<int>(-1,-1);
00828 
00829       //
00830       // send the frame to the EnvVisualCortex and get the vcx output
00831       //
00832 
00833       cus->doInput(rgbin);
00834 
00835       fps_timer.nextFrame();
00836       uidata.time_state = fps_timer.getState();
00837 
00838       if (uidata.time_state.frame_number % 50 == 0)
00839         LINFO("frame %u: %.2f fps", uidata.time_state.frame_number, uidata.time_state.recent_fps);
00840 
00841       const Image<byte> vcxmap = cus->getOutput() * 5.0F;
00842 
00843       //
00844       // build a ui job to run in the background to display update the
00845       // saliency map the input frame, the vcx maps,
00846       //
00847 
00848       uiq->enqueueJob(rutz::make_shared
00849                       (new Nv2UiJob
00850                        (ofs.get(),
00851                         esv.get(),
00852                         eit.get(),
00853                         uidata,
00854                         esm.get(),
00855                         esec->getSeg().get(),
00856                         neoBrain.get(),
00857                         rgbin, vcxmap,
00858                         cus->getIMap().exportToImage(),
00859                         cus->getCMap().exportToImage(),
00860                         cus->getOMap().exportToImage(),
00861                         cus->getFMap().exportToImage(),
00862                         cus->getMMap().exportToImage() /*evc->getMmap()*/
00863                         )));
00864     }
00865 
00866   // destroy the ui queue so that we force it to shut down now
00867   uiq.reset(0);
00868 
00869   manager.stop();
00870 
00871   return retval;
00872 }
00873 
00874 // ######################################################################
00875 int main(int argc, const char** argv)
00876 {
00877   try {
00878     return submain(argc, argv);
00879   } catch (...) {
00880     REPORT_CURRENT_EXCEPTION;
00881   }
00882 }
00883 
00884 // ######################################################################
00885 /* So things look consistent in everyone's emacs... */
00886 /* Local Variables: */
00887 /* mode: c++ */
00888 /* indent-tabs-mode: nil */
00889 /* End: */