00001 /*!@file INVT/neovision2.C CUDA-accelerated Neovision2 integrated demo */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00005 // by the University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Rob Peters <rjpeters at usc dot edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/INVT/neovision2-cuda.C $ 00035 // $Id: neovision2-cuda.C 13232 2010-04-15 02:15:06Z dparks $ 00036 // 00037 00038 #include "Image/OpenCVUtil.H" // must be first to avoid conflicting defs of int64, uint64 00039 00040 #include "Component/GlobalOpts.H" 00041 #include "Component/ModelManager.H" 00042 #include "Component/ModelOptionDef.H" 00043 #include "Component/ModelParam.H" 00044 #include "Component/ModelParamBatch.H" 00045 #include "Devices/DeviceOpts.H" 00046 #include "Devices/IEEE1394grabber.H" 00047 #include "GUI/ImageDisplayStream.H" 00048 #include "GUI/PrefsWindow.H" 00049 #include "GUI/XWinManaged.H" 00050 #include "Image/ColorOps.H" 00051 #include "Image/CutPaste.H" 00052 #include "Image/DrawOps.H" 00053 #include "Image/FilterOps.H" 00054 #include "Image/Image.H" 00055 #include "Image/ImageSet.H" 00056 #include "Image/Layout.H" 00057 #include "Image/MathOps.H" 00058 #include "Image/Pixels.H" 00059 #include "Image/PyramidOps.H" 00060 #include "Image/ShapeOps.H" 00061 #include "Image/Transforms.H" 00062 #include "Media/FrameSeries.H" 00063 #include "Media/MediaOpts.H" 00064 #include "NeovisionII/Nv2LabelReader.H" 00065 #include "NeovisionII/nv2_common.h" 00066 #include "Neuro/NeoBrain.H" 00067 #include "Neuro/EnvInferoTemporal.H" 00068 #include "Neuro/EnvSaliencyMap.H" 00069 #include "Neuro/EnvSegmenterConfigurator.H" 00070 #include "Neuro/EnvVisualCortex.H" 00071 #include "Raster/GenericFrame.H" 00072 #include "Raster/Raster.H" 00073 #include "Transport/FrameInfo.H" 00074 #include "Transport/TransportOpts.H" 00075 #include "Util/FpsTimer.H" 00076 #include "Util/Pause.H" 00077 #include "Util/StringConversions.H" 00078 #include "Util/StringUtil.H" 00079 #include "Util/SyncJobServer.H" 00080 #include "Util/SysInfo.H" 00081 #include "Util/TextLog.H" 00082 #include "Util/WorkThreadServer.H" 00083 #include "Util/csignals.H" 00084 #include "rutz/shared_ptr.h" 00085 #include "rutz/trace.h" 00086 00087 // conflict, both cudadefs.h and opencv define MIN and MAX 00088 #undef MIN 00089 #undef MAX 00090 00091 #include "CUDA/CudaSaliency.H" 00092 00093 #include <ctype.h> 00094 #include <deque> 00095 #include <iterator> 00096 #include <limits> 00097 #include <stdlib.h> // for atoi(), malloc(), free() 00098 #include <string.h> 00099 #include <sys/resource.h> 00100 #include <signal.h> 00101 #include <time.h> 00102 #include <vector> 00103 00104 const size_t PREFERRED_TEXT_LENGTH = 42; 00105 00106 // ###################################################################### 00107 class EnvSimulationViewer : public ModelComponent 00108 { 00109 public: 00110 EnvSimulationViewer(OptionManager& mgr); 00111 00112 virtual ~EnvSimulationViewer(); 00113 00114 virtual void paramChanged(ModelParamBase* const param, const bool valueChanged, ParamClient::ChangeStatus* status); 00115 00116 bool shouldQuit() const { return itsDoQuit; } 00117 00118 OModelParam<Dims> itsInputDims; 00119 OModelParam<size_t> optDispZoom; 00120 OModelParam<size_t> optInputReduce; 00121 OModelParam<std::string> optMainwinTitle; 00122 OModelParam<bool> itsSaveVcx; 00123 OModelParam<bool> itsSaveSm; 00124 00125 bool itsDoQuit; 00126 }; 00127 00128 00129 // ###################################################################### 00130 static const ModelOptionDef OPT_DispZoom = 00131 { MODOPT_ARG(size_t), "EsvDispZoom", &MOC_OUTPUT, OPTEXP_CORE, 00132 "Number of octaves to zoom in on the small maps", 00133 "disp-zoom", '\0', "size_t", "4" }; 00134 00135 static const ModelOptionDef OPT_InputReduce = 00136 { MODOPT_ARG(size_t), "EsvInputReduce", &MOC_OUTPUT, OPTEXP_CORE, 00137 "Number of octaves to reduce the input by, for display purposes only", 00138 "input-reduce", '\0', "size_t", "0" }; 00139 00140 static const ModelOptionDef OPT_MainwinTitle = 00141 { MODOPT_ARG_STRING, "MainwinTitle", &MOC_OUTPUT, OPTEXP_CORE, 00142 "Title to use for main output window", 00143 "mainwin-title", '\0', "<string>", "neovision2" }; 00144 00145 static const ModelOptionDef OPT_SaveVcx = 00146 { MODOPT_FLAG, "SaveVcx", &MOC_OUTPUT, OPTEXP_CORE, 00147 "Whether to save the VisualCortex (VCX) output", 00148 "save-vcx", '\0', "", "false" }; 00149 00150 static const ModelOptionDef OPT_SaveSm = 00151 { MODOPT_FLAG, "SaveSm", &MOC_OUTPUT, OPTEXP_CORE, 00152 "Whether to save the SaliencyMap (Sm) output", 00153 "save-sm", '\0', "", "false" }; 00154 00155 // ###################################################################### 00156 EnvSimulationViewer::EnvSimulationViewer(OptionManager& mgr) : 00157 ModelComponent(mgr, "Embeddable Simulation Viewer", "EnvSimulationViewer"), 00158 itsInputDims(&OPT_InputFrameDims, this), 00159 optDispZoom(&OPT_DispZoom, this, ALLOW_ONLINE_CHANGES), 00160 optInputReduce(&OPT_InputReduce, this, ALLOW_ONLINE_CHANGES), 00161 optMainwinTitle(&OPT_MainwinTitle, this), 00162 itsSaveVcx(&OPT_SaveVcx, this), 00163 itsSaveSm(&OPT_SaveSm, this), 00164 itsDoQuit(false) 00165 { } 00166 00167 EnvSimulationViewer::~EnvSimulationViewer() 00168 { } 00169 00170 void EnvSimulationViewer::paramChanged(ModelParamBase* const param, const bool valueChanged, 00171 ParamClient::ChangeStatus* status) 00172 { 00173 if (param == &itsInputDims) { 00174 const size_t excess_size = size_t(0.5 * log2(itsInputDims.getVal().sz() / 1000000.0)); 00175 if (excess_size > optInputReduce.getVal()) optInputReduce.setVal(excess_size); 00176 } else if (param == &optInputReduce) { 00177 const size_t val = optInputReduce.getVal(); 00178 const Dims d = itsInputDims.getVal(); 00179 00180 if (val > 16) *status = ParamClient::CHANGE_REJECTED; 00181 else if (d.isNonEmpty() && val > 0 && ((d.w() / (1 << val)) < 32 || (d.h() / (1 << val)) < 32)) 00182 *status = ParamClient::CHANGE_REJECTED; 00183 } 00184 } 00185 00186 // ###################################################################### 00187 struct Nv2UiData 00188 { 00189 Nv2UiData(const int map_zoom_) : 00190 accepted_training_label(), 00191 remote_command(), 00192 map_zoom(map_zoom_), 00193 targetLoc(-1,-1), 00194 ncpu(numCpus()), 00195 text_log_file("") 00196 { } 00197 00198 FpsTimer::State time_state; 00199 std::string accepted_training_label; 00200 std::string remote_command; 00201 const int map_zoom; 00202 Point2D<int> targetLoc; 00203 const int ncpu; 00204 std::string text_log_file; 00205 }; 00206 00207 // ###################################################################### 00208 class Nv2UiJob : public JobServer::Job 00209 { 00210 public: 00211 Nv2UiJob(OutputFrameSeries* ofs_, EnvSimulationViewer* esv_, EnvInferoTemporal* eit_, 00212 const Nv2UiData& uidata_, EnvSaliencyMap* sm_, EnvSegmenter* ese_, 00213 NeoBrain* nb_, Image<PixRGB<byte> > rgbin_, Image<byte> vcxmap_, 00214 Image<byte> Imap_, Image<byte> Cmap_, Image<byte> Omap_, Image<byte> Fmap_, Image<byte> Mmap_) : 00215 ofs(ofs_), esv(esv_), eit(eit_), uidata(uidata_), sm(sm_), ese(ese_), neoBrain(nb_), rgbin(rgbin_), 00216 vcxmap(vcxmap_), Imap(Imap_), Cmap(Cmap_), Omap(Omap_), Fmap(Fmap_), Mmap(Mmap_), 00217 m_dispzoom(1 << esv->optDispZoom.getVal()), m_inputreduce(esv->optInputReduce.getVal()) 00218 { } 00219 00220 // #################### 00221 unsigned int getHalfZoom() const 00222 { 00223 const int div = 4; 00224 return std::max(size_t(1), m_dispzoom/div); 00225 } 00226 00227 // #################### 00228 Layout<PixRGB<byte> > makeInputMarkup(const Rectangle& foa, const Image<byte>& foamask, 00229 const EnvSaliencyMap::State& smstate, const uint32_t patch_id) const 00230 { 00231 Image<PixRGB<byte> > markup = rgbin; 00232 00233 if (foa.isValid()) drawRectSquareCorners(markup, foa, PixRGB<byte>(255, 255, 0), 3 << m_inputreduce); 00234 00235 if (uidata.targetLoc.isValid()) 00236 drawCircle(markup, uidata.targetLoc, 3, PixRGB<byte>(60, 220, 255), 3 << m_inputreduce); 00237 00238 // draw the first most salient loc: 00239 drawRectSquareCorners(markup, 00240 Rectangle(smstate.fullres_maxpos - uidata.map_zoom/2, Dims(uidata.map_zoom, uidata.map_zoom)), 00241 PixRGB<byte>(255, 0, 0), 3 << m_inputreduce); 00242 00243 // draw the next n most salient loc: 00244 for (uint i = 1; i < smstate.nMostSalientLoc.size(); ++i) 00245 { 00246 const EnvSaliencyMap::LocInfo locInfo = smstate.nMostSalientLoc[i]; 00247 drawRectSquareCorners(markup, Rectangle(locInfo.fullres_maxpos - uidata.map_zoom/2, 00248 Dims(uidata.map_zoom, uidata.map_zoom)), 00249 PixRGB<byte>(150, 0, 0), 3 << m_inputreduce); 00250 } 00251 00252 for (size_t i = 0; i < m_inputreduce; ++i) markup = decXY(markup); 00253 00254 if (foamask.initialized()) drawContour2D(rescaleNI(foamask, markup.getDims()), markup, PixRGB<byte>(0,255,0), 2); 00255 00256 const std::string lines[2] = { 00257 sformat("peak %3d in %3dx%3d foa @ (%3d,%3d)", int(smstate.maxval), 00258 foa.isValid() ? foa.width() : -1, foa.isValid() ? foa.height() : -1, 00259 smstate.fullres_maxpos.i, smstate.fullres_maxpos.j), 00260 sformat("%s #%06u [%5.2ffps, %5.1f%%CPU]", convertToString(uidata.time_state.elapsed_time).c_str(), 00261 (unsigned int) patch_id, uidata.time_state.recent_fps, uidata.time_state.recent_cpu_usage*100.0) 00262 }; 00263 00264 const Image<PixRGB<byte> > textarea = 00265 makeMultilineTextBox(markup.getWidth(), &lines[0], 2, 00266 PixRGB<byte>(255, 255, 0), PixRGB<byte>(0,0,0), PREFERRED_TEXT_LENGTH); 00267 00268 return vcat(markup, textarea); 00269 } 00270 00271 // #################### 00272 Layout<PixRGB<byte> > 00273 makeSalmapMarkup(const EnvSaliencyMap::State& smstate) const 00274 { 00275 Image<PixRGB<byte> > zoomedsm = zoomXY(smstate.salmap, m_dispzoom, m_dispzoom); 00276 00277 // draw the first most salient loc: 00278 drawRectSquareCorners(zoomedsm, Rectangle(smstate.lowres_maxpos * m_dispzoom, Dims(m_dispzoom, m_dispzoom)), 00279 PixRGB<byte>(255, 0, 0), 3); 00280 00281 // draw the next n most salient locs: 00282 for (uint i = 1; i < smstate.nMostSalientLoc.size(); ++i) 00283 { 00284 const EnvSaliencyMap::LocInfo locInfo = smstate.nMostSalientLoc[i]; 00285 drawRectSquareCorners(zoomedsm, Rectangle(locInfo.lowres_maxpos * m_dispzoom, Dims(m_dispzoom, m_dispzoom)), 00286 PixRGB<byte>(150, 0, 0), 3); 00287 } 00288 00289 const std::string valstring = sformat("%d", int(smstate.maxval)); 00290 00291 const SimpleFont font = SimpleFont::fixedMaxWidth(zoomedsm.getWidth() / 30); 00292 00293 Point2D<int> textpos = smstate.lowres_maxpos * m_dispzoom; 00294 textpos.j -= font.h() + 2; if (textpos.j < 0) textpos.j += m_dispzoom + 2; 00295 00296 writeText(zoomedsm, textpos, valstring.c_str(), 00297 PixRGB<byte>(255, 0, 0), PixRGB<byte>(0, 0, 0), font, true); 00298 00299 Image<PixRGB<byte> > histo = 00300 neoBrain->getSaliencyHisto(Dims(zoomedsm.getWidth(), 62), PixRGB<byte>(0,0,0), PixRGB<byte>(180,180,180)); 00301 return vcat(zoomedsm, histo); 00302 } 00303 00304 // #################### 00305 Layout<PixRGB<byte> > makeCmapsMarkup() const 00306 { 00307 unsigned int halfzoom = this->getHalfZoom() / 2; 00308 00309 Image<PixRGB<byte> > cmaps[] = { 00310 zoomXY(Imap, halfzoom, halfzoom), 00311 zoomXY(Cmap, halfzoom, halfzoom), 00312 zoomXY(Omap, halfzoom, halfzoom), 00313 zoomXY(Fmap, halfzoom, halfzoom), 00314 zoomXY(Mmap, halfzoom, halfzoom), 00315 zoomXY(vcxmap, halfzoom, halfzoom) 00316 }; 00317 00318 const char* labels[] = { "I", "C", "O", "F", "M", "VC" }; 00319 00320 for (size_t i = 0; i < sizeof(labels) / sizeof(labels[0]); ++i) { 00321 const SimpleFont font = SimpleFont::fixedMaxWidth(cmaps[i].getWidth() / 20); 00322 writeText(cmaps[i], Point2D<int>(1,1), labels[i], PixRGB<byte>(0), PixRGB<byte>(255), font); 00323 drawLine(cmaps[i], Point2D<int>(0,0), Point2D<int>(cmaps[i].getWidth()-1,0), PixRGB<byte>(255), 1); 00324 drawLine(cmaps[i], Point2D<int>(0,0), Point2D<int>(0,cmaps[i].getHeight()-1), PixRGB<byte>(255), 1); 00325 } 00326 00327 const size_t nrows = 2; 00328 00329 return arrcat(&cmaps[0], sizeof(cmaps) / sizeof(cmaps[0]), (sizeof(cmaps) / sizeof(cmaps[0]) + (nrows-1)) / nrows); 00330 } 00331 00332 // #################### 00333 Image<PixRGB<byte> > makeInhibitionMarkup() const 00334 { 00335 Image<byte> inh = sm->getInhibmap(); 00336 if (!inh.initialized()) inh = Image<byte>(vcxmap.getDims(), ZEROS); 00337 00338 Image<byte> inr = Image<byte>(sm->getInertiaMap()); 00339 if (!inr.initialized()) inr = Image<byte>(vcxmap.getDims(), ZEROS); 00340 00341 Image<PixRGB<byte> > rgb(vcxmap.getDims(), NO_INIT); 00342 Image<PixRGB<byte> >::iterator aptr = rgb.beginw(); 00343 Image<PixRGB<byte> >::iterator stop = rgb.endw(); 00344 00345 Image<byte>::const_iterator rptr = inh.begin(); 00346 Image<byte>::const_iterator gptr = inr.begin(); 00347 00348 while (aptr != stop) *aptr++ = PixRGB<byte>(*rptr++, *gptr++, 0); 00349 00350 return zoomXY(rgb, getHalfZoom() / 2); 00351 } 00352 00353 // #################### 00354 Image<PixRGB<byte> > makeMeters(const size_t nx, const Dims& meterdims) const 00355 { 00356 const double maxcpu = uidata.ncpu <= 0 ? 100.0 : uidata.ncpu * 100.0; 00357 00358 const double nothresh = std::numeric_limits<double>::max(); 00359 00360 const MeterInfo infos[] = { 00361 { "dvcx/dt", sm->getVcxFlicker(), 1.0, nothresh, PixRGB<byte>(0, 255, 0) }, 00362 { "dfactor", sm->getDynamicFactor(), 1.0, nothresh, PixRGB<byte>(128, 0, 255) }, 00363 { "boringness", neoBrain->getBoringness(), 128.0, nothresh, PixRGB<byte>(192, 255, 0) }, 00364 { "excitement", neoBrain->getExcitementLevel(), 256.0, nothresh, PixRGB<byte>(255, 0, 32) }, 00365 { "sleepiness", neoBrain->getSleepLevel(), 1000.0, nothresh, PixRGB<byte>(255, 0, 32) }, 00366 { "confidence", eit->getMaxConfidence(), 1.0, eit->getConfidenceThresh(), PixRGB<byte>(0, 255, 128) }, 00367 { "cpu%", uidata.time_state.recent_cpu_usage*100.0, maxcpu, nothresh, PixRGB<byte>(255, 165, 0) }, 00368 { "fps", uidata.time_state.recent_fps, 60.0, nothresh, PixRGB<byte>(0, 128, 255) } 00369 }; 00370 00371 return drawMeters(&infos[0], sizeof(infos) / sizeof(infos[0]), nx, meterdims); 00372 } 00373 00374 // #################### 00375 virtual void run() 00376 { 00377 Point2D<int> scaled_maxpos(-1,-1); 00378 00379 const nub::soft_ref<ImageDisplayStream> ids = ofs->findFrameDestType<ImageDisplayStream>(); 00380 00381 const rutz::shared_ptr<XWinManaged> uiwin = ids.is_valid() ? 00382 ids->getWindow(esv->optMainwinTitle.getVal()) : rutz::shared_ptr<XWinManaged>(); 00383 00384 Point2D<int> forceTrackLocation(-1,-1); 00385 00386 if (uiwin.is_valid()) { 00387 XButtonEvent ev; 00388 if (uiwin->getLastButtonEvent(&ev) && ev.button == 1) forceTrackLocation = Point2D<int>(ev.x, ev.y); 00389 } 00390 00391 if (forceTrackLocation.isValid()) { 00392 const Point2D<int> candidate = forceTrackLocation * (1 << m_inputreduce) + (1 << m_inputreduce) / 2; 00393 00394 if (rgbin.coordsOk(candidate)) { 00395 scaled_maxpos = candidate; 00396 neoBrain->setTarget(scaled_maxpos, rgbin, -1); 00397 neoBrain->setKeepTracking(true); 00398 } 00399 } else if (uidata.targetLoc.isValid()) { 00400 scaled_maxpos = uidata.targetLoc; 00401 ASSERT(rgbin.coordsOk(scaled_maxpos)); 00402 } 00403 00404 const EnvSaliencyMap::State smstate = sm->getSalmap(vcxmap, scaled_maxpos); 00405 00406 neoBrain->updateBoringness(smstate.salmap, smstate.maxval); 00407 neoBrain->updateExcitement(sm->getVcxFlicker()); 00408 00409 Image<byte> foamask; 00410 Image<PixRGB<byte> > segmentdisp; 00411 00412 // Send the first most salient locations to be identified 00413 const Rectangle foa = ese->getFoa(rgbin, smstate.fullres_maxpos, &foamask, &segmentdisp); 00414 00415 if (foa.isValid()) { 00416 const Point2D<int> objCenter = Point2D<int>(foa.topLeft().i + foa.width()/2, foa.topLeft().j + foa.height()/2); 00417 neoBrain->setTarget(objCenter, rgbin, smstate.maxval); 00418 } else if (!uidata.targetLoc.isValid()) 00419 neoBrain->setTarget(smstate.fullres_maxpos, rgbin, smstate.maxval); 00420 00421 const uint32_t patch_id = uidata.time_state.frame_number; 00422 LINFO("Sendind attended patch at (%d,%d) to EIT", foa.topLeft().i + foa.width()/2, foa.topLeft().j + foa.height()/2); 00423 eit->sendPatch(patch_id, rgbin, foa, 00424 uidata.time_state.elapsed_time, 00425 uidata.accepted_training_label.length() > 0, 00426 uidata.accepted_training_label, 00427 uidata.remote_command, 00428 smstate.fullres_maxpos); 00429 00430 // Send the next N most salient locations to be identified: 00431 for (uint i = 1; i < smstate.nMostSalientLoc.size(); ++i) { 00432 Image<byte> nextFoamask; 00433 Image<PixRGB<byte> > nextSegmentdisp; 00434 00435 const EnvSaliencyMap::LocInfo locInfo = smstate.nMostSalientLoc[i]; 00436 const Rectangle nextFoa = ese->getFoa(rgbin, locInfo.fullres_maxpos, &nextFoamask, &nextSegmentdisp); 00437 LINFO("Sendind attended patch at (%d,%d) to EIT", nextFoa.topLeft().i + nextFoa.width()/2, nextFoa.topLeft().j + nextFoa.height()/2); 00438 00439 eit->sendPatch(patch_id + 1000000*i /* use different IDs for different patches*/, rgbin, nextFoa, 00440 uidata.time_state.elapsed_time, 00441 uidata.accepted_training_label.length() > 0, 00442 uidata.accepted_training_label, 00443 uidata.remote_command, 00444 locInfo.fullres_maxpos); 00445 } 00446 00447 // log various bits of info (these calls will do nothing if the log filename is empty): 00448 textLog(uidata.text_log_file, "FOAbox", convertToString(foa)); 00449 00450 const FrameState os = ofs->updateNext(); 00451 00452 // save maps if requested: 00453 if (esv->itsSaveVcx.getVal()) ofs->writeGray(vcxmap, "VCO", FrameInfo("VisualCortex output map", SRC_POS)); 00454 00455 if (esv->itsSaveSm.getVal()) ofs->writeGray(smstate.salmap, "SM", FrameInfo("SaliencyMap output map", SRC_POS)); 00456 00457 // ##### compact displays 00458 Layout<PixRGB<byte> > img; 00459 00460 // let's start with an HD display of the input + markups: 00461 Image<PixRGB<byte> > markup = rgbin; 00462 if (foa.isValid()) drawRectSquareCorners(markup, foa, PixRGB<byte>(255, 255, 0), 3 << m_inputreduce); 00463 00464 if (uidata.targetLoc.isValid()) 00465 drawCircle(markup, uidata.targetLoc, 3, PixRGB<byte>(60, 220, 255), 3 << m_inputreduce); 00466 00467 // draw the first most salient loc 00468 drawRectSquareCorners(markup, 00469 Rectangle(smstate.fullres_maxpos - uidata.map_zoom/2, 00470 Dims(uidata.map_zoom, uidata.map_zoom)), 00471 PixRGB<byte>(255, 0, 0), 3 << m_inputreduce); 00472 00473 // draw the next n most salient locs: 00474 for (uint i = 1; i < smstate.nMostSalientLoc.size(); ++i) { 00475 const EnvSaliencyMap::LocInfo locInfo = smstate.nMostSalientLoc[i]; 00476 drawRectSquareCorners(markup, 00477 Rectangle(locInfo.fullres_maxpos - uidata.map_zoom/2, 00478 Dims(uidata.map_zoom, uidata.map_zoom)), PixRGB<byte>(150, 0, 0), 3 << m_inputreduce); 00479 } 00480 00481 for (size_t i = 0; i < m_inputreduce; ++i) markup = decXY(markup); 00482 00483 if (foamask.initialized()) drawContour2D(rescaleNI(foamask,markup.getDims()), markup, PixRGB<byte>(0,255,0), 2); 00484 00485 // that's it for this window, let's send it out to display: 00486 ofs->writeRGB(markup, esv->optMainwinTitle.getVal(), FrameInfo("copy of input", SRC_POS)); 00487 00488 // the salmap: 00489 const unsigned int halfzoom = 8; 00490 Image<PixRGB<byte> > zoomedsm = zoomXY(smstate.salmap, halfzoom, halfzoom); 00491 00492 drawRectSquareCorners(zoomedsm, Rectangle(smstate.lowres_maxpos * halfzoom, Dims(halfzoom, halfzoom)), 00493 PixRGB<byte>(255, 0, 0), 3); 00494 00495 const std::string valstring = sformat("%d", int(smstate.maxval)); 00496 const SimpleFont font = SimpleFont::fixedMaxWidth(zoomedsm.getWidth() / 30); 00497 Point2D<int> textpos = smstate.lowres_maxpos * halfzoom; 00498 textpos.j -= font.h() + 2; if (textpos.j < 0) textpos.j += halfzoom + 2; 00499 writeText(zoomedsm, textpos, valstring.c_str(), PixRGB<byte>(255, 0, 0), PixRGB<byte>(0, 0, 0), font, true); 00500 00501 Image<PixRGB<byte> > inh = this->makeInhibitionMarkup(); 00502 drawLine(inh, Point2D<int>(0,0), Point2D<int>(inh.getWidth()-1,0), PixRGB<byte>(255, 255, 255), 1); 00503 drawLine(inh, Point2D<int>(0,0), Point2D<int>(0,inh.getHeight()-1), PixRGB<byte>(255, 255, 255), 1); 00504 00505 if (!segmentdisp.initialized()) 00506 segmentdisp = Image<PixRGB<byte> >(inh.getDims(), ZEROS); 00507 else { 00508 segmentdisp = rescaleNI(segmentdisp, inh.getDims()); 00509 drawContour2D(rescaleNI(foamask, inh.getDims()), segmentdisp, PixRGB<byte>(0,255,0), 2); 00510 } 00511 drawLine(segmentdisp, Point2D<int>(0,0), Point2D<int>(inh.getWidth()-1,0), 00512 PixRGB<byte>(255, 255, 255), 1); 00513 drawLine(segmentdisp, Point2D<int>(0,0), Point2D<int>(0,inh.getHeight()-1), 00514 PixRGB<byte>(255, 255, 255), 1); 00515 Layout<PixRGB<byte> > inl = vcat(inh, segmentdisp); 00516 00517 img = vcat(zoomedsm, this->makeMeters(2, Dims(zoomedsm.getDims().w() / 2, 13))); 00518 00519 // now some info: 00520 const std::string lines[1] = 00521 { 00522 sformat("peak %3d in %3dx%3d foa @ (%4d,%4d) %04dx%04d %s #%06u [%3.2ffps, %4.1f%%CPU]", 00523 int(smstate.maxval), 00524 foa.isValid() ? foa.width() : -1, 00525 foa.isValid() ? foa.height() : -1, 00526 smstate.fullres_maxpos.i, 00527 smstate.fullres_maxpos.j, 00528 rgbin.getWidth(), rgbin.getHeight(), 00529 convertToString(uidata.time_state.elapsed_time).c_str(), 00530 (unsigned int) patch_id, 00531 uidata.time_state.recent_fps, 00532 uidata.time_state.recent_cpu_usage*100.0) 00533 }; 00534 00535 const Image<PixRGB<byte> > textarea = 00536 makeMultilineTextBox(img.getWidth(), &lines[0], 1, PixRGB<byte>(255, 255, 0), PixRGB<byte>(0,0,0), 00537 PREFERRED_TEXT_LENGTH, 10); 00538 img = vcat(img, textarea); 00539 00540 // now the cmaps and friends: 00541 const Layout<PixRGB<byte> > cmaps = this->makeCmapsMarkup(); 00542 inl = hcat(cmaps, inl); 00543 img = vcat(img, inl); 00544 00545 ofs->writeRgbLayout(img, "neovision2 maps", FrameInfo("copy of input", SRC_POS)); 00546 00547 std::vector<Nv2LabelReader::LabeledImage> images = eit->getLabeledImages(PREFERRED_TEXT_LENGTH); 00548 00549 for (size_t i = 0; i < images.size(); ++i) 00550 { 00551 ofs->writeRGB(images[i].img, images[i].ident, FrameInfo("object-labeled image", SRC_POS)); 00552 neoBrain->sayObjectLabel(images[i].label, /*confidence = */ 0, true); 00553 } 00554 00555 if (os == FRAME_FINAL) esv->itsDoQuit = true; 00556 } 00557 00558 // #################### 00559 virtual const char* jobType() const { return "Nv2UiJob"; } 00560 00561 private: 00562 OutputFrameSeries* const ofs; 00563 EnvSimulationViewer* const esv; 00564 EnvInferoTemporal* const eit; 00565 const Nv2UiData uidata; 00566 EnvSaliencyMap* const sm; 00567 EnvSegmenter* const ese; 00568 NeoBrain* const neoBrain; 00569 Image<PixRGB<byte> > rgbin; 00570 const Image<byte> vcxmap; 00571 const Image<byte> Imap; 00572 const Image<byte> Cmap; 00573 const Image<byte> Omap; 00574 const Image<byte> Fmap; 00575 const Image<byte> Mmap; 00576 const size_t m_dispzoom; 00577 const size_t m_inputreduce; 00578 }; 00579 00580 // ###################################################################### 00581 static const ModelOptionDef OPT_WithObjrecMode = 00582 { MODOPT_FLAG, "WithObjrecMode", &MOC_OUTPUT, OPTEXP_CORE, 00583 "Whether to include an 'objrec' mode which toggles parameters " 00584 "to values suitable for object recognition training.", 00585 "with-objrec-mode", '\0', "", "true" }; 00586 00587 static const ModelOptionDef OPT_ALIASHDDemo = 00588 { MODOPT_ALIAS, "ALIASHDDemo", &MOC_ALIAS, OPTEXP_CORE, 00589 "Set parameters for the hd camera on ilab24", 00590 "hd-demo", '\0', "", 00591 "--in=XC " 00592 "--framegrabber-dims=1920x1080 " 00593 "--patch-reader=192.168.0.229:9930 " 00594 "--disp-zoom=3 " 00595 "--with-objrec-mode " 00596 "--evc-multithreaded " 00597 }; 00598 00599 // ###################################################################### 00600 int submain(int argc, const char** argv) 00601 { 00602 volatile int signum = 0; 00603 signal(SIGPIPE, SIG_IGN); 00604 catchsignals(&signum); 00605 00606 // Instantiate our various ModelComponents: 00607 00608 ModelManager manager("Nv2"); 00609 00610 OModelParam<bool> optWithObjrecMode(&OPT_WithObjrecMode, &manager); 00611 OModelParam<std::string> optTextLogFile(&OPT_TextLogFile, &manager); 00612 00613 nub::ref<EnvSimulationViewer> esv(new EnvSimulationViewer(manager)); 00614 manager.addSubComponent(esv); 00615 00616 nub::ref<InputFrameSeries> ifs(new InputFrameSeries(manager)); 00617 manager.addSubComponent(ifs); 00618 00619 nub::ref<OutputFrameSeries> ofs(new OutputFrameSeries(manager)); 00620 manager.addSubComponent(ofs); 00621 00622 nub::ref<CudaSaliency> cus(new CudaSaliency(manager)); 00623 manager.addSubComponent(cus); 00624 00625 nub::ref<EnvSaliencyMap> esm(new EnvSaliencyMap(manager)); 00626 manager.addSubComponent(esm); 00627 00628 nub::ref<EnvSegmenterConfigurator> esec(new EnvSegmenterConfigurator(manager)); 00629 manager.addSubComponent(esec); 00630 00631 nub::ref<EnvInferoTemporal> eit(new EnvInferoTemporal(manager)); 00632 manager.addSubComponent(eit); 00633 00634 nub::ref<NeoBrain> neoBrain(new NeoBrain(manager)); 00635 manager.addSubComponent(neoBrain); 00636 00637 manager.requestOptionAlias(&OPT_ALIASHDDemo); 00638 00639 manager.exportOptions(MC_RECURSE); 00640 00641 #if defined(HAVE_IEEE1394) 00642 // input comes from firewire camera 640x480/rgb/15fps by default 00643 manager.setOptionValString(&OPT_InputFrameSource, "ieee1394"); 00644 manager.setOptionValString(&OPT_FrameGrabberMode, "RGB24"); 00645 manager.setOptionValString(&OPT_FrameGrabberDims, "640x480"); 00646 manager.setOptionValString(&OPT_FrameGrabberFPS, "15"); 00647 #elif defined(HAVE_QUICKTIME_QUICKTIME_H) 00648 manager.setOptionValString(&OPT_InputFrameSource, "qtgrab"); 00649 manager.setOptionValString(&OPT_FrameGrabberDims, "640x480"); 00650 #endif 00651 00652 // output goes to the screen by default 00653 manager.setOptionValString(&OPT_OutputFrameSink, "display"); 00654 00655 // change some default values 00656 manager.setOptionValString(&OPT_EsmInertiaHalfLife, "60"); 00657 manager.setOptionValString(&OPT_EsmIorStrength, "8.0"); 00658 00659 if (manager.parseCommandLine(argc, argv, "<ip1:port1,ip2:port2,...>", 0, 1) == false) return(1); 00660 00661 eit->initReaders(manager.numExtraArgs() > 0 ? manager.getExtraArg(0) : ""); 00662 00663 manager.start(); 00664 00665 neoBrain->init(ifs->peekDims()); 00666 00667 Nv2UiData uidata(1 << /* evc->getMapLevel()*/ 4); 00668 uidata.text_log_file = optTextLogFile.getVal(); 00669 00670 PrefsWindow pwin("control panel", SimpleFont::FIXED(8)); 00671 pwin.setValueNumChars(16); 00672 00673 pwin.addPrefsForComponent(esv.get()); 00674 pwin.addPrefsForComponent(esm.get()); 00675 pwin.addPrefsForComponent(esec->getSeg().get()); 00676 pwin.addPrefsForComponent(eit.get()); 00677 pwin.addPrefsForComponent(neoBrain.get(), true); 00678 00679 PrefItemBln prefPause(&pwin, "pause", false); 00680 PrefItemStr prefRemoteCommand(&pwin, "remote command", uidata.remote_command); 00681 PrefItemBln prefInTrainingMode(&pwin, "in training mode", false); 00682 PrefItemBln prefInObjRecMode(optWithObjrecMode.getVal() ? &pwin : 0, "in ObjRec mode", false); 00683 PrefItemBln prefDoGrabFrame(&pwin, "grab frame", true); 00684 PrefItemBln prefCommitTrainingImage(&pwin, "commit training image", false); 00685 PrefItemBln prefCommitTrainingImageConfirm(&pwin, "confirm commit ??", false); 00686 PrefItemStr prefTrainingLabel(&pwin, "training label", ""); 00687 PrefItemByt prefFontSize(&pwin, "font size", 6); 00688 00689 PrefsWindow inputprefs; 00690 inputprefs.addPrefsForComponent(ifs->getFrameSource().get()); 00691 pwin.setFont(SimpleFont::fixedMaxWidth(prefFontSize.get())); 00692 inputprefs.setFont(SimpleFont::fixedMaxWidth(prefFontSize.get())); 00693 00694 PauseWaiter p; 00695 00696 int retval = 0; 00697 00698 rutz::shared_ptr<JobServer> uiq; 00699 // set up a background job server with one worker thread to 00700 // handle the ui jobs: 00701 rutz::shared_ptr<WorkThreadServer> tsrv(new WorkThreadServer("neovision2-ui", 1)); 00702 00703 // keep max latency low, and if we get bogged down, then drop 00704 // old frames rather than new ones 00705 tsrv->setMaxQueueSize(2); 00706 tsrv->setDropPolicy(WorkThreadServer::DROP_OLDEST); 00707 tsrv->setFlushBeforeStopping(false); 00708 uiq = tsrv; 00709 00710 ASSERT(uiq.get() != 0); 00711 00712 ifs->startStream(); 00713 00714 const GenericFrameSpec fspec = ifs->peekFrameSpec(); 00715 00716 FpsTimer fps_timer; 00717 00718 bool previous_training_mode = prefInTrainingMode.get(); 00719 bool previous_do_fixed = esm->getUseFixed(); 00720 Image<PixRGB<byte> > rgbin_last; 00721 00722 ModelParamBatch objrecParams; 00723 objrecParams.addParamValue("EseDynamicFoa", false); 00724 objrecParams.addParamValue("EseFoaSize", 80); 00725 objrecParams.addParamValue("NeobrainBoringnessThresh", 2000); 00726 objrecParams.addParamValue("NeobrainTargetFramesThresh", (unsigned long) 2000); 00727 objrecParams.addParamValue("NeobrainNoMoveFramesThresh", (unsigned long) 2000); 00728 00729 bool previous_objrec_mode = prefInObjRecMode.get(); 00730 00731 while (true) 00732 { 00733 if (signum != 0) { 00734 LINFO("quitting because %s was caught", signame(signum)); 00735 retval = -1; 00736 break; 00737 } 00738 00739 if (ofs->becameVoid()) { 00740 LINFO("quitting because output stream was closed or became void"); 00741 break; 00742 } 00743 00744 if (esv->shouldQuit()) break; 00745 00746 // 00747 // update preferences window and uidata 00748 // 00749 00750 pwin.update(); // handle pending preference window events 00751 00752 setPause(prefPause.get()); 00753 uidata.remote_command = prefRemoteCommand.get(); 00754 00755 prefCommitTrainingImage.setDisabled(!prefInTrainingMode.get()); 00756 prefCommitTrainingImageConfirm.setDisabled(!prefInTrainingMode.get()); 00757 prefTrainingLabel.setDisabled(!prefInTrainingMode.get()); 00758 00759 pwin.setFont(SimpleFont::fixedMaxWidth(prefFontSize.get())); 00760 00761 inputprefs.setFont(SimpleFont::fixedMaxWidth(prefFontSize.get())); 00762 inputprefs.update(); 00763 00764 if (prefInObjRecMode.get()) { 00765 if (!previous_objrec_mode) objrecParams.installValues(&manager); // save previous values 00766 } else { 00767 if (previous_objrec_mode) objrecParams.restoreValues(&manager); //restore values 00768 } 00769 00770 previous_objrec_mode = prefInObjRecMode.get(); 00771 00772 // This code enforces the "training mode" logic 00773 // .. i.e., certain combinations of preferences are not possible. 00774 uidata.accepted_training_label = ""; 00775 00776 if (prefInTrainingMode.get()) { 00777 if (!previous_training_mode) previous_do_fixed = esm->getUseFixed(); 00778 00779 esm->setUseFixed(true); 00780 00781 if (prefCommitTrainingImageConfirm.get()) { 00782 if (!prefCommitTrainingImage.get()) 00783 prefCommitTrainingImageConfirm.set(false); 00784 else if (prefTrainingLabel.get().length() <= 3) { 00785 prefCommitTrainingImage.set(false); 00786 prefCommitTrainingImageConfirm.set(false); 00787 prefTrainingLabel.set(""); 00788 00789 LERROR("invalid training label %s (too short)", prefTrainingLabel.get().c_str()); 00790 } else { 00791 // OK, we accept the training label as a valid one 00792 // and send it off to the labelers: 00793 uidata.accepted_training_label = prefTrainingLabel.get(); 00794 } 00795 } 00796 } else { 00797 // training mode is off, certain settings not possible 00798 prefDoGrabFrame.set(true); 00799 prefCommitTrainingImage.set(false); 00800 prefCommitTrainingImageConfirm.set(false); 00801 prefTrainingLabel.set(""); 00802 00803 // this just handles unfixing window when training is first toggled off 00804 if (previous_training_mode) esm->setUseFixed(previous_do_fixed); 00805 } 00806 00807 previous_training_mode = prefInTrainingMode.get(); 00808 00809 if (p.checkPause()) continue; 00810 00811 // 00812 // get the next frame from our input source 00813 // 00814 00815 const FrameState is = ifs->updateNext(); 00816 if (is == FRAME_COMPLETE) break; 00817 00818 GenericFrame input = ifs->readFrame(); 00819 if (!input.initialized()) break; 00820 00821 // only read in from camera if do_grab_frame 00822 const Image<PixRGB<byte> > rgbin = prefDoGrabFrame.get() ? input.asRgb() : rgbin_last; 00823 00824 rgbin_last = rgbin; 00825 00826 if (eit->belowConfidenceThresh()) uidata.targetLoc = neoBrain->trackObject(rgbin); 00827 else uidata.targetLoc = Point2D<int>(-1,-1); 00828 00829 // 00830 // send the frame to the EnvVisualCortex and get the vcx output 00831 // 00832 00833 cus->doInput(rgbin); 00834 00835 fps_timer.nextFrame(); 00836 uidata.time_state = fps_timer.getState(); 00837 00838 if (uidata.time_state.frame_number % 50 == 0) 00839 LINFO("frame %u: %.2f fps", uidata.time_state.frame_number, uidata.time_state.recent_fps); 00840 00841 const Image<byte> vcxmap = cus->getOutput() * 5.0F; 00842 00843 // 00844 // build a ui job to run in the background to display update the 00845 // saliency map the input frame, the vcx maps, 00846 // 00847 00848 uiq->enqueueJob(rutz::make_shared 00849 (new Nv2UiJob 00850 (ofs.get(), 00851 esv.get(), 00852 eit.get(), 00853 uidata, 00854 esm.get(), 00855 esec->getSeg().get(), 00856 neoBrain.get(), 00857 rgbin, vcxmap, 00858 cus->getIMap().exportToImage(), 00859 cus->getCMap().exportToImage(), 00860 cus->getOMap().exportToImage(), 00861 cus->getFMap().exportToImage(), 00862 cus->getMMap().exportToImage() /*evc->getMmap()*/ 00863 ))); 00864 } 00865 00866 // destroy the ui queue so that we force it to shut down now 00867 uiq.reset(0); 00868 00869 manager.stop(); 00870 00871 return retval; 00872 } 00873 00874 // ###################################################################### 00875 int main(int argc, const char** argv) 00876 { 00877 try { 00878 return submain(argc, argv); 00879 } catch (...) { 00880 REPORT_CURRENT_EXCEPTION; 00881 } 00882 } 00883 00884 // ###################################################################### 00885 /* So things look consistent in everyone's emacs... */ 00886 /* Local Variables: */ 00887 /* mode: c++ */ 00888 /* indent-tabs-mode: nil */ 00889 /* End: */