00001 /*!@file TIGS/TrainingSet.C Manage a paired set of eye position data and input feature vectors */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00005 // by the University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Rob Peters <rjpeters at usc dot edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/TIGS/TrainingSet.C $ 00035 // $Id: TrainingSet.C 9412 2008-03-10 23:10:15Z farhan $ 00036 // 00037 00038 #ifndef TIGS_TRAININGSET_C_DEFINED 00039 #define TIGS_TRAININGSET_C_DEFINED 00040 00041 #include "TIGS/TrainingSet.H" 00042 00043 #include "Component/ModelOptionDef.H" 00044 #include "Image/ShapeOps.H" 00045 #include "Media/MediaOpts.H" 00046 #include "Raster/Raster.H" 00047 #include "TIGS/TigsOpts.H" 00048 #include "Util/AllocAux.H" 00049 #include "rutz/trace.h" 00050 00051 // Used by: TrainingSet 00052 static const ModelOptionDef OPT_TrainingSetDecimation = 00053 { MODOPT_ARG(int), "TrainingSetDecimation", &MOC_TIGS, OPTEXP_CORE, 00054 "Factor by which to decimate the number of samples in " 00055 "topdown context training sets", 00056 "tdata-decimation", '\0', "<int>", "1" }; 00057 00058 // Used by: TrainingSet 00059 static const ModelOptionDef OPT_TrainingSetRebalance = 00060 { MODOPT_FLAG, "TrainingSetRebalance", &MOC_TIGS, OPTEXP_CORE, 00061 "Whether to rebalance the training set so that the distribution " 00062 "of eye positions is as flat as possible", 00063 "tdata-rebalance", '\0', "", "false" }; 00064 00065 // Used by: TrainingSet 00066 static const ModelOptionDef OPT_TrainingSetRebalanceThresh = 00067 { MODOPT_ARG(uint), "TrainingSetRebalanceThresh", &MOC_TIGS, OPTEXP_CORE, 00068 "When rebalancing the training set's distribution of eye " 00069 "positions, only include positions for which at least this many " 00070 "samples are available", 00071 "tdata-rebalance-thresh", '\0', "<int>", "10" }; 00072 00073 // Used by: TrainingSet 00074 static const ModelOptionDef OPT_TrainingSetRebalanceGroupSize = 00075 { MODOPT_ARG(uint), "TrainingSetRebalanceGroupSize", &MOC_TIGS, OPTEXP_CORE, 00076 "When rebalancing the training set's distribution of eye " 00077 "positions, pool the samples into this many samples per eye position", 00078 "tdata-rebalance-group-size", '\0', "<int>", "10" }; 00079 00080 TrainingSet::TrainingSet(OptionManager& mgr, const std::string& fx_type) 00081 : 00082 ModelComponent(mgr, "TrainingSet", "TrainingSet"), 00083 itsRawInputDims(&OPT_InputFrameDims, this), 00084 itsDoRebalance(&OPT_TrainingSetRebalance, this), 00085 itsRebalanceThresh(&OPT_TrainingSetRebalanceThresh, this), 00086 itsRebalanceGroupSize(&OPT_TrainingSetRebalanceGroupSize, this), 00087 itsFxType(fx_type), 00088 itsReduction(32), 00089 itsNumFeatures(0), 00090 itsLocked(false), 00091 itsFeatureVec(), 00092 itsPositionVec(), 00093 itsPosGroups(), 00094 itsNumTraining(0), 00095 itsNumLoaded(0), 00096 itsFeatures(), 00097 itsPositions(), 00098 itsDecimationFactor(&OPT_TrainingSetDecimation, this) 00099 {} 00100 00101 Dims TrainingSet::scaledInputDims() const 00102 { 00103 ASSERT(itsRawInputDims.getVal().isNonEmpty()); 00104 ASSERT(itsReduction > 0); 00105 00106 return itsRawInputDims.getVal() / int(itsReduction); 00107 } 00108 00109 size_t TrainingSet::numPositions() const 00110 { 00111 ASSERT(scaledInputDims().isNonEmpty()); 00112 return scaledInputDims().sz(); 00113 } 00114 00115 int TrainingSet::p2p(const int i, const int j) const 00116 { 00117 ASSERT(scaledInputDims().isNonEmpty()); 00118 ASSERT(itsReduction > 0); 00119 return (j / itsReduction) * scaledInputDims().w() + (i / itsReduction); 00120 } 00121 00122 int TrainingSet::p2p(const Point2D<int>& p) const 00123 { 00124 return p2p(p.i, p.j); 00125 } 00126 00127 Image<float> TrainingSet::recordSample(const Point2D<int>& loc, 00128 const Image<float>& features) 00129 { 00130 GVX_TRACE(__PRETTY_FUNCTION__); 00131 00132 ASSERT(!itsLocked); 00133 00134 ASSERT(scaledInputDims().isNonEmpty()); 00135 00136 if (itsNumFeatures == 0) 00137 { 00138 // ok, it's the first time, so let's pick up our number of 00139 // features from the size of the input vector 00140 itsNumFeatures = features.getSize(); 00141 LINFO("%s TrainingSet with %"ZU" features", 00142 itsFxType.c_str(), itsNumFeatures); 00143 } 00144 00145 ASSERT(itsNumFeatures > 0); 00146 ASSERT(size_t(features.getSize()) == itsNumFeatures); 00147 00148 ASSERT(loc.i >= 0); 00149 ASSERT(loc.j >= 0); 00150 00151 ASSERT(itsReduction > 0); 00152 00153 const Point2D<int> locr(loc.i / itsReduction, loc.j / itsReduction); 00154 00155 const size_t i1 = locr.i; 00156 const size_t i0 = locr.i > 0 ? (locr.i-1) : locr.i; 00157 const size_t i2 = locr.i < (scaledInputDims().w() - 1) ? (locr.i+1) : locr.i; 00158 00159 const size_t j1 = locr.j; 00160 const size_t j0 = locr.j > 0 ? (locr.j-1) : locr.j; 00161 const size_t j2 = locr.j < (scaledInputDims().h() - 1) ? (locr.j+1) : locr.j; 00162 00163 const size_t p00 = j0 * scaledInputDims().w() + i0; 00164 const size_t p01 = j1 * scaledInputDims().w() + i0; 00165 const size_t p02 = j2 * scaledInputDims().w() + i0; 00166 00167 const size_t p10 = j0 * scaledInputDims().w() + i1; 00168 const size_t p11 = j1 * scaledInputDims().w() + i1; 00169 const size_t p12 = j2 * scaledInputDims().w() + i1; 00170 00171 const size_t p20 = j0 * scaledInputDims().w() + i2; 00172 const size_t p21 = j1 * scaledInputDims().w() + i2; 00173 const size_t p22 = j2 * scaledInputDims().w() + i2; 00174 00175 const size_t np = this->numPositions(); 00176 00177 for (size_t x = 0; x < np; ++x) 00178 { 00179 itsPositionVec.push_back(0.0f); 00180 00181 float& v = itsPositionVec.back(); 00182 00183 // note these are not a series of "else if", since it's 00184 // possible that more than one of the p00, p01, etc. point to 00185 // the same position, if we are at an edge of the image 00186 if (x == p00) v += 0.25; 00187 if (x == p01) v += 0.5; 00188 if (x == p02) v += 0.25; 00189 00190 if (x == p10) v += 0.5; 00191 if (x == p11) v += 1.0; 00192 if (x == p12) v += 0.5; 00193 00194 if (x == p20) v += 0.25; 00195 if (x == p21) v += 0.5; 00196 if (x == p22) v += 0.25; 00197 } 00198 00199 for (size_t x = 0; x < itsNumFeatures; ++x) 00200 { 00201 itsFeatureVec.push_back(features[x]); 00202 } 00203 00204 ++itsNumTraining; 00205 00206 // return an image showing the eye position array after being 00207 // subjected to our little 3x3 blurring 00208 return Image<float>(&*itsPositionVec.end() - this->numPositions(), 00209 scaledInputDims()); 00210 } 00211 00212 void TrainingSet::load(const std::string& pfx) 00213 { 00214 if (itsDoRebalance.getVal()) 00215 { 00216 this->loadRebalanced(pfx); 00217 return; 00218 } 00219 00220 GVX_TRACE(__PRETTY_FUNCTION__); 00221 00222 const std::string ffile = pfx+"-features.pfm"; 00223 const std::string pfile = pfx+"-positions.pfm"; 00224 00225 Image<float> feat = Raster::ReadFloat(ffile, RASFMT_PFM); 00226 Image<float> pos = Raster::ReadFloat(pfile, RASFMT_PFM); 00227 00228 ASSERT(feat.getHeight() == pos.getHeight()); 00229 00230 if (itsNumFeatures == 0) 00231 { 00232 // ok, it's the first time, so let's pick up our number of 00233 // features from the size of the input vector 00234 itsNumFeatures = feat.getWidth(); 00235 LINFO("%s TrainingSet with %"ZU" features", 00236 itsFxType.c_str(), itsNumFeatures); 00237 } 00238 00239 ASSERT(size_t(feat.getWidth()) == itsNumFeatures); 00240 00241 if (itsDecimationFactor.getVal() > 1) 00242 { 00243 feat = blurAndDecY(feat, itsDecimationFactor.getVal()); 00244 pos = blurAndDecY(pos, itsDecimationFactor.getVal()); 00245 00246 ASSERT(feat.getHeight() == pos.getHeight()); 00247 } 00248 00249 itsFeatureVec.insert(itsFeatureVec.end(), feat.begin(), feat.end()); 00250 itsPositionVec.insert(itsPositionVec.end(), pos.begin(), pos.end()); 00251 00252 itsNumTraining += feat.getHeight(); 00253 00254 ++itsNumLoaded; 00255 00256 // we've loaded external data, so we don't want to allow any more 00257 // internal training samples to come in through recordSample(); 00258 // however, we could still accept more external samples through 00259 // additional load() calls 00260 itsLocked = true; 00261 00262 LINFO("loaded %d samples from training set %s, %d total training samples from %d files", 00263 feat.getHeight(), pfx.c_str(), itsNumTraining, itsNumLoaded); 00264 00265 // we have loaded some huge .pfm files here which are likely to be 00266 // of unusual sizes, so we will only waste memory by trying to cache 00267 // those memory blocks, so let's just release all free memory now: 00268 invt_allocation_release_free_mem(); 00269 } 00270 00271 void TrainingSet::loadRebalanced(const std::string& pfx) 00272 { 00273 GVX_TRACE(__PRETTY_FUNCTION__); 00274 00275 const std::string ffile = pfx+"-features.pfm"; 00276 const std::string pfile = pfx+"-positions.pfm"; 00277 00278 Image<float> feat = Raster::ReadFloat(ffile, RASFMT_PFM); 00279 Image<float> pos = Raster::ReadFloat(pfile, RASFMT_PFM); 00280 00281 ASSERT(feat.getHeight() == pos.getHeight()); 00282 00283 if (itsNumFeatures == 0) 00284 { 00285 // ok, it's the first time, so let's pick up our number of 00286 // features from the size of the input vector 00287 itsNumFeatures = feat.getWidth(); 00288 LINFO("%s TrainingSet with %"ZU" features", 00289 itsFxType.c_str(), itsNumFeatures); 00290 00291 std::vector<PosGroup>().swap(itsPosGroups); 00292 itsPosGroups.resize(pos.getWidth(), 00293 PosGroup(itsRebalanceGroupSize.getVal(), 00294 feat.getWidth(), pos.getWidth())); 00295 00296 ASSERT(itsRebalanceThresh.getVal() >= itsRebalanceGroupSize.getVal()); 00297 } 00298 00299 ASSERT(size_t(feat.getWidth()) == itsNumFeatures); 00300 00301 for (int y = 0; y < pos.getHeight(); ++y) 00302 { 00303 int nmax = 0; 00304 for (int x = 0; x < pos.getWidth(); ++x) 00305 { 00306 const float v = pos.getVal(x, y); 00307 if (v >= 1.0f) 00308 { 00309 ++nmax; 00310 itsPosGroups[x].add(feat.getArrayPtr() + y * feat.getWidth(), 00311 pos.getArrayPtr() + y * pos.getWidth()); 00312 } 00313 } 00314 00315 if (nmax != 1) 00316 LFATAL("nmax = %d (expected nmax = 1) in row %d", nmax, y); 00317 } 00318 00319 std::vector<float>().swap(itsFeatureVec); 00320 std::vector<float>().swap(itsPositionVec); 00321 00322 uint nzero = 0; 00323 uint naccept = 0; 00324 uint nsamp = 0; 00325 Image<byte> bb(20, 15, ZEROS); 00326 itsNumTraining = 0; 00327 for (uint i = 0; i < itsPosGroups.size(); ++i) 00328 { 00329 if (itsPosGroups[i].totalcount == 0) 00330 ++nzero; 00331 if (itsPosGroups[i].totalcount >= itsRebalanceThresh.getVal()) 00332 { 00333 ++naccept; 00334 00335 for (uint k = 0; k < itsPosGroups[i].counts.size(); ++k) 00336 { 00337 const Image<float> f = 00338 itsPosGroups[i].features[k] / itsPosGroups[i].counts[k]; 00339 00340 const Image<float> p = 00341 itsPosGroups[i].positions[k] / itsPosGroups[i].counts[k]; 00342 00343 itsFeatureVec.insert(itsFeatureVec.end(), 00344 f.begin(), f.end()); 00345 itsPositionVec.insert(itsPositionVec.end(), 00346 p.begin(), p.end()); 00347 00348 ++itsNumTraining; 00349 } 00350 bb[i] = 255; 00351 } 00352 nsamp += itsPosGroups[i].totalcount; 00353 } 00354 00355 LINFO("ngroups = %" ZU ", nsamp = %u, naccept = %u, nzero = %u", 00356 itsPosGroups.size(), nsamp, naccept, nzero); 00357 00358 ++itsNumLoaded; 00359 00360 // we've loaded external data, so we don't want to allow any more 00361 // internal training samples to come in through recordSample(); 00362 // however, we could still accept more external samples through 00363 // additional load() calls 00364 itsLocked = true; 00365 00366 LINFO("loaded %d samples from training set %s, %d total training samples from %d files", 00367 feat.getHeight(), pfx.c_str(), itsNumTraining, itsNumLoaded); 00368 00369 // we have loaded some huge .pfm files here which are likely to be 00370 // of unusual sizes, so we will only waste memory by trying to cache 00371 // those memory blocks, so let's just release all free memory now: 00372 invt_allocation_release_free_mem(); 00373 } 00374 00375 void TrainingSet::save(const std::string& pfx) 00376 { 00377 GVX_TRACE(__PRETTY_FUNCTION__); 00378 00379 const std::string ffile = pfx+"-features.pfm"; 00380 const std::string pfile = pfx+"-positions.pfm"; 00381 00382 if (Raster::fileExists(ffile)) 00383 LINFO("save skipped; file already exists: %s", ffile.c_str()); 00384 else 00385 Raster::WriteFloat(this->getFeatures(), FLOAT_NORM_PRESERVE, ffile, RASFMT_PFM); 00386 00387 if (Raster::fileExists(pfile)) 00388 LINFO("save skipped; file already exists: %s", pfile.c_str()); 00389 else 00390 Raster::WriteFloat(this->getPositions(), FLOAT_NORM_PRESERVE, pfile, RASFMT_PFM); 00391 00392 LINFO("saved training set %s", pfx.c_str()); 00393 } 00394 00395 Image<float> TrainingSet::getFeatures() const 00396 { 00397 ASSERT(itsNumFeatures > 0); 00398 00399 if (itsFeatures.getHeight() != itsNumTraining) 00400 { 00401 itsFeatures = Image<float>(&itsFeatureVec[0], 00402 itsNumFeatures, itsNumTraining); 00403 } 00404 00405 return itsFeatures; 00406 } 00407 00408 Image<float> TrainingSet::getPositions() const 00409 { 00410 if (itsPositions.getHeight() != itsNumTraining) 00411 { 00412 itsPositions = Image<float>(&itsPositionVec[0], 00413 this->numPositions(), itsNumTraining); 00414 } 00415 00416 return itsPositions; 00417 } 00418 00419 uint TrainingSet::inputReduction() const 00420 { 00421 return itsReduction; 00422 } 00423 00424 const std::string& TrainingSet::fxType() const 00425 { 00426 return itsFxType; 00427 } 00428 00429 // ###################################################################### 00430 /* So things look consistent in everyone's emacs... */ 00431 /* Local Variables: */ 00432 /* mode: c++ */ 00433 /* indent-tabs-mode: nil */ 00434 /* End: */ 00435 00436 #endif // TIGS_TRAININGSET_C_DEFINED