TrainingSet.C

Go to the documentation of this file.
00001 /*!@file TIGS/TrainingSet.C Manage a paired set of eye position data and input feature vectors */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005   //
00005 // by the University of Southern California (USC) and the iLab at USC.  //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: Rob Peters <rjpeters at usc dot edu>
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/TIGS/TrainingSet.C $
00035 // $Id: TrainingSet.C 9412 2008-03-10 23:10:15Z farhan $
00036 //
00037 
00038 #ifndef TIGS_TRAININGSET_C_DEFINED
00039 #define TIGS_TRAININGSET_C_DEFINED
00040 
00041 #include "TIGS/TrainingSet.H"
00042 
00043 #include "Component/ModelOptionDef.H"
00044 #include "Image/ShapeOps.H"
00045 #include "Media/MediaOpts.H"
00046 #include "Raster/Raster.H"
00047 #include "TIGS/TigsOpts.H"
00048 #include "Util/AllocAux.H"
00049 #include "rutz/trace.h"
00050 
00051 // Used by: TrainingSet
00052 static const ModelOptionDef OPT_TrainingSetDecimation =
00053   { MODOPT_ARG(int), "TrainingSetDecimation", &MOC_TIGS, OPTEXP_CORE,
00054     "Factor by which to decimate the number of samples in "
00055     "topdown context training sets",
00056     "tdata-decimation", '\0', "<int>", "1" };
00057 
00058 // Used by: TrainingSet
00059 static const ModelOptionDef OPT_TrainingSetRebalance =
00060   { MODOPT_FLAG, "TrainingSetRebalance", &MOC_TIGS, OPTEXP_CORE,
00061     "Whether to rebalance the training set so that the distribution "
00062     "of eye positions is as flat as possible",
00063     "tdata-rebalance", '\0', "", "false" };
00064 
00065 // Used by: TrainingSet
00066 static const ModelOptionDef OPT_TrainingSetRebalanceThresh =
00067   { MODOPT_ARG(uint), "TrainingSetRebalanceThresh", &MOC_TIGS, OPTEXP_CORE,
00068     "When rebalancing the training set's distribution of eye "
00069     "positions, only include positions for which at least this many "
00070     "samples are available",
00071     "tdata-rebalance-thresh", '\0', "<int>", "10" };
00072 
00073 // Used by: TrainingSet
00074 static const ModelOptionDef OPT_TrainingSetRebalanceGroupSize =
00075   { MODOPT_ARG(uint), "TrainingSetRebalanceGroupSize", &MOC_TIGS, OPTEXP_CORE,
00076     "When rebalancing the training set's distribution of eye "
00077     "positions, pool the samples into this many samples per eye position",
00078     "tdata-rebalance-group-size", '\0', "<int>", "10" };
00079 
00080 TrainingSet::TrainingSet(OptionManager& mgr, const std::string& fx_type)
00081   :
00082   ModelComponent(mgr, "TrainingSet", "TrainingSet"),
00083   itsRawInputDims(&OPT_InputFrameDims, this),
00084   itsDoRebalance(&OPT_TrainingSetRebalance, this),
00085   itsRebalanceThresh(&OPT_TrainingSetRebalanceThresh, this),
00086   itsRebalanceGroupSize(&OPT_TrainingSetRebalanceGroupSize, this),
00087   itsFxType(fx_type),
00088   itsReduction(32),
00089   itsNumFeatures(0),
00090   itsLocked(false),
00091   itsFeatureVec(),
00092   itsPositionVec(),
00093   itsPosGroups(),
00094   itsNumTraining(0),
00095   itsNumLoaded(0),
00096   itsFeatures(),
00097   itsPositions(),
00098   itsDecimationFactor(&OPT_TrainingSetDecimation, this)
00099 {}
00100 
00101 Dims TrainingSet::scaledInputDims() const
00102 {
00103   ASSERT(itsRawInputDims.getVal().isNonEmpty());
00104   ASSERT(itsReduction > 0);
00105 
00106   return itsRawInputDims.getVal() / int(itsReduction);
00107 }
00108 
00109 size_t TrainingSet::numPositions() const
00110 {
00111   ASSERT(scaledInputDims().isNonEmpty());
00112   return scaledInputDims().sz();
00113 }
00114 
00115 int TrainingSet::p2p(const int i, const int j) const
00116 {
00117   ASSERT(scaledInputDims().isNonEmpty());
00118   ASSERT(itsReduction > 0);
00119   return (j / itsReduction) * scaledInputDims().w() + (i / itsReduction);
00120 }
00121 
00122 int TrainingSet::p2p(const Point2D<int>& p) const
00123 {
00124   return p2p(p.i, p.j);
00125 }
00126 
00127 Image<float> TrainingSet::recordSample(const Point2D<int>& loc,
00128                                        const Image<float>& features)
00129 {
00130   GVX_TRACE(__PRETTY_FUNCTION__);
00131 
00132   ASSERT(!itsLocked);
00133 
00134   ASSERT(scaledInputDims().isNonEmpty());
00135 
00136   if (itsNumFeatures == 0)
00137     {
00138       // ok, it's the first time, so let's pick up our number of
00139       // features from the size of the input vector
00140       itsNumFeatures = features.getSize();
00141       LINFO("%s TrainingSet with %"ZU" features",
00142             itsFxType.c_str(), itsNumFeatures);
00143     }
00144 
00145   ASSERT(itsNumFeatures > 0);
00146   ASSERT(size_t(features.getSize()) == itsNumFeatures);
00147 
00148   ASSERT(loc.i >= 0);
00149   ASSERT(loc.j >= 0);
00150 
00151   ASSERT(itsReduction > 0);
00152 
00153   const Point2D<int> locr(loc.i / itsReduction, loc.j / itsReduction);
00154 
00155   const size_t i1 = locr.i;
00156   const size_t i0 = locr.i > 0 ? (locr.i-1) : locr.i;
00157   const size_t i2 = locr.i < (scaledInputDims().w() - 1) ? (locr.i+1) : locr.i;
00158 
00159   const size_t j1 = locr.j;
00160   const size_t j0 = locr.j > 0 ? (locr.j-1) : locr.j;
00161   const size_t j2 = locr.j < (scaledInputDims().h() - 1) ? (locr.j+1) : locr.j;
00162 
00163   const size_t p00 = j0 * scaledInputDims().w() + i0;
00164   const size_t p01 = j1 * scaledInputDims().w() + i0;
00165   const size_t p02 = j2 * scaledInputDims().w() + i0;
00166 
00167   const size_t p10 = j0 * scaledInputDims().w() + i1;
00168   const size_t p11 = j1 * scaledInputDims().w() + i1;
00169   const size_t p12 = j2 * scaledInputDims().w() + i1;
00170 
00171   const size_t p20 = j0 * scaledInputDims().w() + i2;
00172   const size_t p21 = j1 * scaledInputDims().w() + i2;
00173   const size_t p22 = j2 * scaledInputDims().w() + i2;
00174 
00175   const size_t np = this->numPositions();
00176 
00177   for (size_t x = 0; x < np; ++x)
00178     {
00179       itsPositionVec.push_back(0.0f);
00180 
00181       float& v = itsPositionVec.back();
00182 
00183       // note these are not a series of "else if", since it's
00184       // possible that more than one of the p00, p01, etc. point to
00185       // the same position, if we are at an edge of the image
00186       if (x == p00) v += 0.25;
00187       if (x == p01) v += 0.5;
00188       if (x == p02) v += 0.25;
00189 
00190       if (x == p10) v += 0.5;
00191       if (x == p11) v += 1.0;
00192       if (x == p12) v += 0.5;
00193 
00194       if (x == p20) v += 0.25;
00195       if (x == p21) v += 0.5;
00196       if (x == p22) v += 0.25;
00197     }
00198 
00199   for (size_t x = 0; x < itsNumFeatures; ++x)
00200     {
00201       itsFeatureVec.push_back(features[x]);
00202     }
00203 
00204   ++itsNumTraining;
00205 
00206   // return an image showing the eye position array after being
00207   // subjected to our little 3x3 blurring
00208   return Image<float>(&*itsPositionVec.end() - this->numPositions(),
00209                       scaledInputDims());
00210 }
00211 
00212 void TrainingSet::load(const std::string& pfx)
00213 {
00214   if (itsDoRebalance.getVal())
00215     {
00216       this->loadRebalanced(pfx);
00217       return;
00218     }
00219 
00220   GVX_TRACE(__PRETTY_FUNCTION__);
00221 
00222   const std::string ffile = pfx+"-features.pfm";
00223   const std::string pfile = pfx+"-positions.pfm";
00224 
00225   Image<float> feat = Raster::ReadFloat(ffile, RASFMT_PFM);
00226   Image<float> pos = Raster::ReadFloat(pfile, RASFMT_PFM);
00227 
00228   ASSERT(feat.getHeight() == pos.getHeight());
00229 
00230   if (itsNumFeatures == 0)
00231     {
00232       // ok, it's the first time, so let's pick up our number of
00233       // features from the size of the input vector
00234       itsNumFeatures = feat.getWidth();
00235       LINFO("%s TrainingSet with %"ZU" features",
00236             itsFxType.c_str(), itsNumFeatures);
00237     }
00238 
00239   ASSERT(size_t(feat.getWidth()) == itsNumFeatures);
00240 
00241   if (itsDecimationFactor.getVal() > 1)
00242     {
00243       feat = blurAndDecY(feat, itsDecimationFactor.getVal());
00244       pos = blurAndDecY(pos, itsDecimationFactor.getVal());
00245 
00246       ASSERT(feat.getHeight() == pos.getHeight());
00247     }
00248 
00249   itsFeatureVec.insert(itsFeatureVec.end(), feat.begin(), feat.end());
00250   itsPositionVec.insert(itsPositionVec.end(), pos.begin(), pos.end());
00251 
00252   itsNumTraining += feat.getHeight();
00253 
00254   ++itsNumLoaded;
00255 
00256   // we've loaded external data, so we don't want to allow any more
00257   // internal training samples to come in through recordSample();
00258   // however, we could still accept more external samples through
00259   // additional load() calls
00260   itsLocked = true;
00261 
00262   LINFO("loaded %d samples from training set %s, %d total training samples from %d files",
00263         feat.getHeight(), pfx.c_str(), itsNumTraining, itsNumLoaded);
00264 
00265   // we have loaded some huge .pfm files here which are likely to be
00266   // of unusual sizes, so we will only waste memory by trying to cache
00267   // those memory blocks, so let's just release all free memory now:
00268   invt_allocation_release_free_mem();
00269 }
00270 
00271 void TrainingSet::loadRebalanced(const std::string& pfx)
00272 {
00273   GVX_TRACE(__PRETTY_FUNCTION__);
00274 
00275   const std::string ffile = pfx+"-features.pfm";
00276   const std::string pfile = pfx+"-positions.pfm";
00277 
00278   Image<float> feat = Raster::ReadFloat(ffile, RASFMT_PFM);
00279   Image<float> pos = Raster::ReadFloat(pfile, RASFMT_PFM);
00280 
00281   ASSERT(feat.getHeight() == pos.getHeight());
00282 
00283   if (itsNumFeatures == 0)
00284     {
00285       // ok, it's the first time, so let's pick up our number of
00286       // features from the size of the input vector
00287       itsNumFeatures = feat.getWidth();
00288       LINFO("%s TrainingSet with %"ZU" features",
00289             itsFxType.c_str(), itsNumFeatures);
00290 
00291       std::vector<PosGroup>().swap(itsPosGroups);
00292       itsPosGroups.resize(pos.getWidth(),
00293                           PosGroup(itsRebalanceGroupSize.getVal(),
00294                                    feat.getWidth(), pos.getWidth()));
00295 
00296       ASSERT(itsRebalanceThresh.getVal() >= itsRebalanceGroupSize.getVal());
00297     }
00298 
00299   ASSERT(size_t(feat.getWidth()) == itsNumFeatures);
00300 
00301   for (int y = 0; y < pos.getHeight(); ++y)
00302     {
00303       int nmax = 0;
00304       for (int x = 0; x < pos.getWidth(); ++x)
00305         {
00306           const float v = pos.getVal(x, y);
00307           if (v >= 1.0f)
00308             {
00309               ++nmax;
00310               itsPosGroups[x].add(feat.getArrayPtr() + y * feat.getWidth(),
00311                                   pos.getArrayPtr() + y * pos.getWidth());
00312             }
00313         }
00314 
00315       if (nmax != 1)
00316         LFATAL("nmax = %d (expected nmax = 1) in row %d", nmax, y);
00317     }
00318 
00319   std::vector<float>().swap(itsFeatureVec);
00320   std::vector<float>().swap(itsPositionVec);
00321 
00322   uint nzero = 0;
00323   uint naccept = 0;
00324   uint nsamp = 0;
00325   Image<byte> bb(20, 15, ZEROS);
00326   itsNumTraining = 0;
00327   for (uint i = 0; i < itsPosGroups.size(); ++i)
00328     {
00329       if (itsPosGroups[i].totalcount == 0)
00330         ++nzero;
00331       if (itsPosGroups[i].totalcount >= itsRebalanceThresh.getVal())
00332         {
00333           ++naccept;
00334 
00335           for (uint k = 0; k < itsPosGroups[i].counts.size(); ++k)
00336             {
00337               const Image<float> f =
00338                 itsPosGroups[i].features[k] / itsPosGroups[i].counts[k];
00339 
00340               const Image<float> p =
00341                 itsPosGroups[i].positions[k] / itsPosGroups[i].counts[k];
00342 
00343               itsFeatureVec.insert(itsFeatureVec.end(),
00344                                    f.begin(), f.end());
00345               itsPositionVec.insert(itsPositionVec.end(),
00346                                     p.begin(), p.end());
00347 
00348               ++itsNumTraining;
00349             }
00350           bb[i] = 255;
00351         }
00352       nsamp += itsPosGroups[i].totalcount;
00353     }
00354 
00355   LINFO("ngroups = %" ZU ", nsamp = %u, naccept = %u, nzero = %u",
00356         itsPosGroups.size(), nsamp, naccept, nzero);
00357 
00358   ++itsNumLoaded;
00359 
00360   // we've loaded external data, so we don't want to allow any more
00361   // internal training samples to come in through recordSample();
00362   // however, we could still accept more external samples through
00363   // additional load() calls
00364   itsLocked = true;
00365 
00366   LINFO("loaded %d samples from training set %s, %d total training samples from %d files",
00367         feat.getHeight(), pfx.c_str(), itsNumTraining, itsNumLoaded);
00368 
00369   // we have loaded some huge .pfm files here which are likely to be
00370   // of unusual sizes, so we will only waste memory by trying to cache
00371   // those memory blocks, so let's just release all free memory now:
00372   invt_allocation_release_free_mem();
00373 }
00374 
00375 void TrainingSet::save(const std::string& pfx)
00376 {
00377   GVX_TRACE(__PRETTY_FUNCTION__);
00378 
00379   const std::string ffile = pfx+"-features.pfm";
00380   const std::string pfile = pfx+"-positions.pfm";
00381 
00382   if (Raster::fileExists(ffile))
00383     LINFO("save skipped; file already exists: %s", ffile.c_str());
00384   else
00385     Raster::WriteFloat(this->getFeatures(), FLOAT_NORM_PRESERVE, ffile, RASFMT_PFM);
00386 
00387   if (Raster::fileExists(pfile))
00388     LINFO("save skipped; file already exists: %s", pfile.c_str());
00389   else
00390     Raster::WriteFloat(this->getPositions(), FLOAT_NORM_PRESERVE, pfile, RASFMT_PFM);
00391 
00392   LINFO("saved training set %s", pfx.c_str());
00393 }
00394 
00395 Image<float> TrainingSet::getFeatures() const
00396 {
00397   ASSERT(itsNumFeatures > 0);
00398 
00399   if (itsFeatures.getHeight() != itsNumTraining)
00400     {
00401       itsFeatures = Image<float>(&itsFeatureVec[0],
00402                                  itsNumFeatures, itsNumTraining);
00403     }
00404 
00405   return itsFeatures;
00406 }
00407 
00408 Image<float> TrainingSet::getPositions() const
00409 {
00410   if (itsPositions.getHeight() != itsNumTraining)
00411     {
00412       itsPositions = Image<float>(&itsPositionVec[0],
00413                                   this->numPositions(), itsNumTraining);
00414     }
00415 
00416   return itsPositions;
00417 }
00418 
00419 uint TrainingSet::inputReduction() const
00420 {
00421   return itsReduction;
00422 }
00423 
00424 const std::string& TrainingSet::fxType() const
00425 {
00426   return itsFxType;
00427 }
00428 
00429 // ######################################################################
00430 /* So things look consistent in everyone's emacs... */
00431 /* Local Variables: */
00432 /* mode: c++ */
00433 /* indent-tabs-mode: nil */
00434 /* End: */
00435 
00436 #endif // TIGS_TRAININGSET_C_DEFINED