00001 /*!@file Vgames/app-roi-extract.C */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00005 // by the University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Rob Peters <rjpeters at usc dot edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Vgames/app-roi-extract.C $ 00035 // $Id: app-roi-extract.C 9412 2008-03-10 23:10:15Z farhan $ 00036 // 00037 00038 #ifndef VGAMES_APP_ROI_EXTRACT_C_DEFINED 00039 #define VGAMES_APP_ROI_EXTRACT_C_DEFINED 00040 00041 #include "Component/ModelManager.H" 00042 #include "GUI/XWinManaged.H" 00043 #include "Image/ColorOps.H" 00044 #include "Image/CutPaste.H" 00045 #include "Image/DrawOps.H" 00046 #include "Image/Image.H" 00047 #include "Image/Layout.H" 00048 #include "Image/MathOps.H" 00049 #include "Image/Pixels.H" 00050 #include "Image/ShapeOps.H" 00051 #include "Image/SimpleFont.H" 00052 #include "Media/FrameSeries.H" 00053 #include "Raster/GenericFrame.H" 00054 #include "Raster/Raster.H" 00055 #include "Raster/PfmParser.H" 00056 #include "Raster/PfmWriter.H" 00057 #include "Transport/FrameInfo.H" 00058 #include "Util/Pause.H" 00059 #include "Util/StringUtil.H" 00060 #include "Util/csignals.H" 00061 #include "Util/sformat.H" 00062 #include "rutz/shared_ptr.h" 00063 00064 #include <fstream> 00065 #include <iomanip> 00066 #include <iterator> 00067 #include <limits> 00068 #include <map> 00069 #include <sstream> 00070 #include <vector> 00071 00072 #include <unistd.h> 00073 00074 Image<PixRGB<byte> > normC255(const Image<PixRGB<double> >& x) 00075 { 00076 Image<PixRGB<double> > tmp(x); 00077 normalizeC(tmp, 0, 255); 00078 return Image<PixRGB<byte> >(tmp); 00079 } 00080 00081 class PatchSet 00082 { 00083 public: 00084 PatchSet(const std::string& nm, const Dims& d) 00085 : 00086 itsName(nm), 00087 itsDims(d) 00088 { 00089 this->load(); 00090 } 00091 00092 Dims getDims() const { return itsDims; } 00093 00094 void save() const 00095 { 00096 const std::string fname = sformat("%s.patchset", itsName.c_str()); 00097 00098 std::ofstream ofs(fname.c_str()); 00099 if (!ofs.is_open()) 00100 LFATAL("couldn't open %s for writing", fname.c_str()); 00101 00102 ofs << itsName << '\n'; 00103 ofs << convertToString(itsDims) << '\n'; 00104 00105 std::map<std::string, PatchInfo>::const_iterator itr, stop; 00106 00107 size_t ntotal = 0; 00108 00109 for (itr = itsInfo.begin(), stop = itsInfo.end(); itr != stop; ++itr) 00110 { 00111 ofs << (*itr).first << '\n' 00112 << (*itr).second.id() << '\n' 00113 << (*itr).second.n() << '\n'; 00114 00115 ntotal += (*itr).second.n(); 00116 } 00117 00118 ASSERT(itsFeatures.size() == (itsDims.sz() * 3 * ntotal)); 00119 00120 const Image<float> features(&itsFeatures[0], 00121 itsDims.sz() * 3, ntotal); 00122 00123 Image<float> ids(itsInfo.size(), features.getHeight(), ZEROS); 00124 for (size_t i = 0; i < itsLabelIds.size(); ++i) 00125 { 00126 ASSERT(itsLabelIds[i] < ids.getWidth()); 00127 ids.setVal(itsLabelIds[i], i, 1.0f); 00128 } 00129 00130 PfmWriter::writeFloat(features, 00131 sformat("%s.features.pfm", itsName.c_str())); 00132 00133 PfmWriter::writeFloat(ids, 00134 sformat("%s.ids.pfm", itsName.c_str())); 00135 } 00136 00137 void load() 00138 { 00139 itsInfo.clear(); 00140 00141 const std::string fname = sformat("%s.patchset", itsName.c_str()); 00142 00143 std::ifstream ifs(fname.c_str()); 00144 if (!ifs.is_open()) 00145 { 00146 LINFO("couldn't open %s for reading", fname.c_str()); 00147 return; 00148 } 00149 00150 std::string name; 00151 std::getline(ifs, name); 00152 if (name != itsName) 00153 LFATAL("wrong name in file %s (expected %s, got %s)", 00154 fname.c_str(), itsName.c_str(), name.c_str()); 00155 00156 std::string dimsstr; 00157 std::getline(ifs, dimsstr); 00158 if (fromStr<Dims>(dimsstr) != itsDims) 00159 LFATAL("wrong dims in file %s (expected %s, got %s)", 00160 fname.c_str(), convertToString(itsDims).c_str(), dimsstr.c_str()); 00161 00162 size_t ntotal = 0; 00163 00164 while (1) 00165 { 00166 std::string label; 00167 if (!std::getline(ifs, label)) 00168 break; 00169 00170 if (itsInfo.find(label) != itsInfo.end()) 00171 LFATAL("already read PatchInfo for label %s in file %s", 00172 label.c_str(), fname.c_str()); 00173 00174 int id; 00175 if (!(ifs >> id)) 00176 LFATAL("couldn't read id value for PatchInfo %s from file %s", 00177 label.c_str(), fname.c_str()); 00178 ifs >> std::ws; 00179 theirNextLabelId = std::max(theirNextLabelId, id + 1); 00180 LINFO("got patch id = %d, next id = %d", 00181 id, theirNextLabelId); 00182 00183 int n = -1; 00184 if (!(ifs >> n)) 00185 LFATAL("couldn't read N value for PatchInfo %s from file %s", 00186 label.c_str(), fname.c_str()); 00187 ifs >> std::ws; 00188 if (n < 0) 00189 LFATAL("got bogus N value %d for PatchInfo %s from file %s", 00190 n, label.c_str(), fname.c_str()); 00191 00192 ntotal += n; 00193 00194 PatchInfo info(label, id, n); 00195 00196 itsInfo.insert(std::make_pair(label, info)); 00197 00198 LINFO("read PatchInfo %s with n=%"ZU" from file %s", 00199 label.c_str(), info.n(), fname.c_str()); 00200 } 00201 00202 const Image<float> features = 00203 PfmParser(sformat("%s.features.pfm", itsName.c_str())).getFrame().asFloat(); 00204 00205 ASSERT(features.getHeight() == int(ntotal)); 00206 ASSERT(features.getWidth() == itsDims.sz() * 3); 00207 00208 const Image<float> ids = 00209 PfmParser(sformat("%s.ids.pfm", itsName.c_str())).getFrame().asFloat(); 00210 00211 ASSERT(ids.getHeight() == int(ntotal)); 00212 LINFO("ids.getWidth() = %d", ids.getWidth()); 00213 LINFO("theirNextLabelId = %d", theirNextLabelId); 00214 ASSERT(ids.getWidth() == theirNextLabelId); 00215 00216 itsFeatures.resize(0); 00217 itsFeatures.insert(itsFeatures.end(), features.begin(), features.end()); 00218 00219 itsLabelIds.resize(0); 00220 for (int y = 0; y < ids.getHeight(); ++y) 00221 { 00222 int pos = -1; 00223 for (int x = 0; x < ids.getWidth(); ++x) 00224 { 00225 const float val = ids.getVal(x,y); 00226 if (val == 1.0f) 00227 { 00228 if (pos == -1) 00229 pos = x; 00230 else 00231 LFATAL("oops! more than one label id (columns %d and %d) " 00232 "in row %d of file %s.ids.pfm", 00233 pos, x, y, itsName.c_str()); 00234 } 00235 else if (val != 0.0f) 00236 { 00237 LFATAL("oops! invalid value %.17f in column %d, row %d " 00238 "of file %s.ids.pfm", val, x, y, itsName.c_str()); 00239 } 00240 } 00241 if (pos == -1) 00242 LFATAL("oops! no label id in row %d of file %s.ids.pfm", 00243 y, itsName.c_str()); 00244 00245 ASSERT(pos >= 0 && pos < theirNextLabelId); 00246 00247 itsLabelIds.push_back(pos); 00248 } 00249 } 00250 00251 void addLabeledPatch(const std::string& label, 00252 const Image<PixRGB<byte> >& patch) 00253 { 00254 ASSERT(patch.getDims() == itsDims); 00255 00256 if (itsInfo.find(label) == itsInfo.end()) 00257 itsInfo.insert(std::make_pair(label, PatchInfo(label, theirNextLabelId++))); 00258 00259 PatchInfo& info = (*itsInfo.find(label)).second; 00260 00261 info.addPatch(patch); 00262 00263 for (int i = 0; i < patch.getSize(); ++i) 00264 for (int j = 0; j < 3; ++j) 00265 itsFeatures.push_back(float(patch.getVal(i).p[j])); 00266 00267 itsLabelIds.push_back(info.id()); 00268 } 00269 00270 private: 00271 const std::string itsName; 00272 const Dims itsDims; 00273 00274 struct PatchInfo 00275 { 00276 PatchInfo(const std::string& l, int id, size_t n = 0) 00277 : itsLabel(l), itsLabelId(id), itsN(n) {} 00278 00279 int id() const { return itsLabelId; } 00280 00281 size_t n() const { return itsN; } 00282 00283 void addPatch(const Image<PixRGB<byte> >& patch) 00284 { 00285 itsN++; 00286 } 00287 00288 private: 00289 const std::string itsLabel; 00290 const int itsLabelId; 00291 size_t itsN; 00292 }; 00293 00294 static int theirNextLabelId; 00295 00296 std::map<std::string, PatchInfo> itsInfo; 00297 std::vector<float> itsFeatures; 00298 std::vector<int> itsLabelIds; 00299 }; 00300 00301 int PatchSet::theirNextLabelId = 0; 00302 00303 class RoiExtractor 00304 { 00305 public: 00306 RoiExtractor(const std::string& nm, 00307 const rutz::shared_ptr<PatchSet>& ps, 00308 const Point2D<int>& pt) 00309 : 00310 itsName(nm), 00311 itsPatchSet(ps), 00312 itsRegion(Rectangle(pt, ps->getDims())) 00313 {} 00314 00315 void label(Image<PixRGB<byte> >& img, const PixRGB<byte>& col) 00316 { 00317 drawRectSquareCorners(img, itsRegion, col, 1); 00318 00319 writeText(img, itsRegion.bottomLeft(), itsName.c_str(), 00320 col, PixRGB<byte>(0,0,0), 00321 SimpleFont::FIXED(6), true); 00322 } 00323 00324 const Rectangle& rect() const { return itsRegion; } 00325 00326 PatchSet& patchSet() { return *itsPatchSet; } 00327 00328 private: 00329 const std::string itsName; 00330 const rutz::shared_ptr<PatchSet> itsPatchSet; 00331 const Rectangle itsRegion; 00332 }; 00333 00334 int main(const int argc, const char **argv) 00335 { 00336 volatile int signum = 0; 00337 catchsignals(&signum); 00338 00339 ModelManager manager("Streamer"); 00340 00341 nub::soft_ref<InputFrameSeries> ifs(new InputFrameSeries(manager)); 00342 manager.addSubComponent(ifs); 00343 00344 if (manager.parseCommandLine(argc, argv, "configfile", 1, 2) == false) 00345 return(1); 00346 00347 std::map<std::string, rutz::shared_ptr<PatchSet> > patches; 00348 std::vector<rutz::shared_ptr<RoiExtractor> > regions; 00349 00350 { 00351 std::ifstream ifs(manager.getExtraArg(0).c_str()); 00352 if (!ifs.is_open()) 00353 LFATAL("couldn't open %s for reading", manager.getExtraArg(0).c_str()); 00354 00355 std::string line; 00356 while (std::getline(ifs, line)) 00357 { 00358 if (line.length() > 0 && line[0] == '#') 00359 continue; 00360 00361 std::vector<std::string> parts; 00362 split(line, ":", std::back_inserter(parts)); 00363 00364 if (parts.size() == 0) 00365 LFATAL("invalid empty argument"); 00366 00367 if (parts[0] == "patchset") 00368 { 00369 if (parts.size() != 3) 00370 LFATAL("expected patchset:name:dims but got %s", 00371 line.c_str()); 00372 00373 const std::string nm = parts[1]; 00374 const Dims d = fromStr<Dims>(parts[2]); 00375 patches[nm] = rutz::shared_ptr<PatchSet>(new PatchSet(nm, d)); 00376 } 00377 else if (parts[0] == "roi") 00378 { 00379 if (parts.size() != 4) 00380 LFATAL("expected roi:name:patchsetname:point but got %s", 00381 line.c_str()); 00382 00383 rutz::shared_ptr<PatchSet> p = patches[parts[2]]; 00384 if (p.get() == 0) 00385 LFATAL("invalid patchset name %s", parts[1].c_str()); 00386 00387 const Point2D<int> pt = fromStr<Point2D<int> >(parts[3]); 00388 00389 regions.push_back(rutz::shared_ptr<RoiExtractor> 00390 (new RoiExtractor(parts[1], p, pt))); 00391 } 00392 } 00393 } 00394 00395 std::string outprefix = "regions"; 00396 00397 manager.start(); 00398 00399 ifs->startStream(); 00400 00401 PauseWaiter p; 00402 00403 // XWinManaged mainwin(ifs->peekDims(), -1, -1, "main"); 00404 XWinManaged zoomwin(Dims(16,16), -1, -1, "zoom"); 00405 00406 std::ifstream autoresp; 00407 if (manager.numExtraArgs() >= 2) 00408 { 00409 autoresp.open(manager.getExtraArg(1).c_str()); 00410 if (!autoresp.is_open()) 00411 LFATAL("couldn't open %s for reading", 00412 manager.getExtraArg(1).c_str()); 00413 } 00414 00415 int n = 0; 00416 00417 while (true) 00418 { 00419 if (signum != 0) 00420 { 00421 LINFO("quitting because %s was caught", signame(signum)); 00422 return -1; 00423 } 00424 00425 if (p.checkPause()) 00426 continue; 00427 00428 const FrameState is = ifs->updateNext(); 00429 if (is == FRAME_COMPLETE) 00430 break; 00431 00432 const Image<PixRGB<byte> > input = ifs->readRGB(); 00433 if (!input.initialized()) 00434 break; 00435 00436 Image<PixRGB<byte> > labeledinput(input); 00437 for (size_t i = 0; i < regions.size(); ++i) 00438 regions[i]->label(labeledinput, PixRGB<byte>(255, 0, 0)); 00439 00440 // mainwin.setDims(input.getDims()); 00441 00442 bool doquit = false; 00443 00444 for (size_t i = 0; i < regions.size(); ++i) 00445 { 00446 Image<PixRGB<byte> > inputcopy(labeledinput); 00447 regions[i]->label(inputcopy, PixRGB<byte>(128, 255, 0)); 00448 // mainwin.drawImage(inputcopy); 00449 00450 const Image<PixRGB<byte> > patch = crop(input, regions[i]->rect()); 00451 Image<PixRGB<byte> > zoomed = zoomXY(patch, 8); 00452 zoomwin.setDims(zoomed.getDims()); 00453 00454 std::string resp; 00455 00456 if (autoresp.is_open()) 00457 { 00458 std::string line; 00459 if (!std::getline(autoresp, line)) 00460 { 00461 LERROR("couldn't read line %d of autoresponse file", n); 00462 break; 00463 } 00464 00465 std::istringstream iss(line); 00466 int nn; 00467 if (!(iss >> nn >> resp)) 00468 LFATAL("couldn't parse number and response from " 00469 "line %d of autoresponse file", n); 00470 00471 if (n != nn) 00472 LFATAL("wrong frame number in autoresponse file " 00473 "(got %d, expected %d)", nn, n); 00474 00475 writeText(zoomed, Point2D<int>(0,0), line.c_str(), 00476 PixRGB<byte>(0,0,255), PixRGB<byte>(0,0,0), 00477 SimpleFont::FIXED(10), true); 00478 zoomwin.drawImage(zoomed); 00479 } 00480 else 00481 { 00482 zoomwin.drawImage(zoomed); 00483 00484 while ((resp = zoomwin.getLastKeyString()).length() == 0) 00485 { 00486 usleep(10000); 00487 } 00488 00489 if (isalnum(resp[0])) 00490 resp = resp[0]; 00491 else if (resp[0] == ' ') // space 00492 resp = "none"; 00493 else if (resp[0] == '?') 00494 resp = "unknown"; 00495 else if (resp[0] == 27) // escape 00496 { 00497 LINFO("ESCAPE!"); 00498 doquit = true; 00499 break; 00500 } 00501 else // invalid response 00502 { 00503 resp = ""; 00504 } 00505 } 00506 00507 if (resp.length() > 0 && resp != "unknown") 00508 regions[i]->patchSet().addLabeledPatch(resp, patch); 00509 } 00510 00511 if (doquit) 00512 break; 00513 00514 ++n; 00515 } 00516 00517 for (std::map<std::string, rutz::shared_ptr<PatchSet> >::const_iterator 00518 itr = patches.begin(), stop = patches.end(); itr != stop; ++itr) 00519 { 00520 (*itr).second->save(); 00521 } 00522 00523 return 0; 00524 } 00525 00526 // ###################################################################### 00527 /* So things look consistent in everyone's emacs... */ 00528 /* Local Variables: */ 00529 /* mode: c++ */ 00530 /* indent-tabs-mode: nil */ 00531 /* End: */ 00532 00533 #endif // VGAMES_APP_ROI_EXTRACT_C_DEFINED