learnvision.C

Go to the documentation of this file.
00001 /*!@file INVT/learnvision.C  like ezvision.C but focused on learning */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the //
00005 // University of Southern California (USC) and the iLab at USC.         //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: Laurent Itti <itti@usc.edu>
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/INVT/learnvision.C $
00035 // $Id: learnvision.C 12074 2009-11-24 07:51:51Z itti $
00036 //
00037 
00038 #include "Channels/ChannelBase.H"
00039 #include "Channels/ChannelVisitor.H"
00040 #include "Channels/SingleChannel.H"
00041 #include "Component/ModelManager.H"
00042 #include "Image/Image.H"
00043 #include "Image/MathOps.H"
00044 #include "Image/Pixels.H"
00045 #include "Image/ShapeOps.H" // for rescale()
00046 #include "Image/Transforms.H" // for chamfer34()
00047 #include "Channels/RawVisualCortex.H"
00048 #include "Raster/Raster.H"
00049 #include "Util/SimTime.H"
00050 #include "Util/Types.H"
00051 
00052 #include <vector>
00053 #include <cstdio>
00054 
00055 namespace
00056 {
00057 
00058   class CoeffLearner : public ChannelVisitor
00059   {
00060   public:
00061     CoeffLearner(const Image<byte>& dmap, const double eta,
00062                  const bool softmask,
00063                  const int inthresh, const int outthresh)
00064       :
00065       itsDmap(dmap),
00066       itsEta(eta),
00067       itsSoftmask(softmask),
00068       itsInThresh(inthresh),
00069       itsOutThresh(outthresh),
00070       itsAbsSumCoeffs()
00071     {
00072       itsAbsSumCoeffs.push_back(0.0);
00073     }
00074 
00075     virtual ~CoeffLearner() {}
00076 
00077     double absSumCoeffs() const
00078     {
00079       ASSERT(itsAbsSumCoeffs.size() == 1);
00080       return itsAbsSumCoeffs.back();
00081     }
00082 
00083     virtual void visitChannelBase(ChannelBase& chan)
00084     {
00085       LFATAL("don't know how to handle %s", chan.tagName().c_str());
00086     }
00087 
00088     virtual void visitSingleChannel(SingleChannel& chan)
00089     {
00090       if (chan.visualFeature() == FLICKER)
00091         {
00092           // do nothing; we can't "learn" flicker from a single input
00093           // image
00094           return;
00095         }
00096 
00097       chan.killCaches();
00098       /* FIXME
00099       const LevelSpec ls = chan.getLevelSpec();
00100 
00101       for (uint del = ls.delMin(); del <= ls.delMax(); ++del)
00102         for (uint lev = ls.levMin(); lev <= ls.levMax(); ++lev)
00103           {
00104             const uint idx = ls.csToIndex(lev, lev+del);
00105             const Image<float> fmap = chan.getSubmap(idx);
00106             const double oldcoeff = chan.getCoeff(idx);
00107 
00108             const double newcoeff = oldcoeff +
00109               itsEta * learningCoeff(fmap,
00110                                      rescale(itsDmap, fmap.getDims()),
00111                                      itsSoftmask,
00112                                      itsInThresh, itsOutThresh);
00113 
00114             chan.setCoeff(idx, newcoeff);
00115 
00116             LINFO("%s(%d,%d): %f -> %f",
00117                   chan.tagName().c_str(), lev, lev+del,
00118                   oldcoeff, newcoeff);
00119           }
00120 
00121       chan.clampCoeffs(0.0, 100.0);
00122 
00123       ASSERT(itsAbsSumCoeffs.size() > 0);
00124       itsAbsSumCoeffs.back() += chan.absSumCoeffs();
00125       */
00126     }
00127 
00128     virtual void visitComplexChannel(ComplexChannel& chan)
00129     {
00130       chan.killCaches();
00131       /* FIXME
00132       for (uint i = 0; i < chan.numChans(); ++i)
00133         {
00134           itsAbsSumCoeffs.push_back(0.0);
00135           chan.subChan(i)->accept(*this);
00136           const double wt =
00137             clampValue(chan.getSubchanTotalWeight(i) / chan.numSubmaps(),
00138                        0.0, 100.0);
00139           chan.setSubchanTotalWeight(i, wt * chan.numSubmaps());
00140           itsAbsSumCoeffs.back() *= chan.getSubchanTotalWeight(i);
00141 
00142           const double subsum = itsAbsSumCoeffs.back();
00143           itsAbsSumCoeffs.pop_back();
00144 
00145           itsAbsSumCoeffs.back() += subsum;
00146         }
00147       */
00148       // I leave to the user the opportunity to normalize the coeffs
00149       // or not after each learning. Some normalization should be done
00150       // at some point to prevent coeff blowout.
00151     }
00152 
00153   private:
00154     const Image<byte> itsDmap;
00155     const double itsEta;
00156     const bool itsSoftmask;
00157     const int itsInThresh;
00158     const int itsOutThresh;
00159 
00160     std::vector<double> itsAbsSumCoeffs;
00161   };
00162 
00163 
00164   class CoeffNormalizer : public ChannelVisitor
00165   {
00166   public:
00167     CoeffNormalizer(const double div)
00168       :
00169       itsDiv(div)
00170     {}
00171 
00172     virtual ~CoeffNormalizer() {}
00173 
00174     virtual void visitChannelBase(ChannelBase& chan)
00175     {
00176       LFATAL("don't know how to handle %s", chan.tagName().c_str());
00177     }
00178 
00179     virtual void visitSingleChannel(SingleChannel& chan)
00180     {
00181       ////FIXME      chan.normalizeCoeffs(itsDiv);
00182     }
00183 
00184     virtual void visitComplexChannel(ComplexChannel& chan)
00185     {
00186       for (uint i = 0; i < chan.numChans(); ++i)
00187         chan.subChan(i)->accept(*this);
00188     }
00189 
00190   private:
00191     const double itsDiv;
00192   };
00193 
00194 }
00195 
00196 //! Basic program to learn feature map weights from static images
00197 /*! This program allows training of the relative weights of feature
00198   maps, given an image and associated binary target mask. */
00199 int main(const int argc, const char **argv)
00200 {
00201   MYLOGVERB = LOG_INFO;  // suppress debug messages
00202 
00203   // Instantiate a ModelManager:
00204   ModelManager manager("Attention Model");
00205 
00206   // Instantiate our various ModelComponents:
00207   nub::soft_ref<RawVisualCortex> vcx(new RawVisualCortex(manager));
00208   manager.addSubComponent(vcx);
00209 
00210   // Parse command-line:
00211   if (manager.parseCommandLine(argc, argv,
00212                                "<image> <targetMask> <coeffs.pmap> "
00213                                "<D|N> <inthresh> <outthresh> <eta>",
00214                                7, 7) == false)
00215     return(1);
00216 
00217   // do post-command-line configs:
00218   Image< PixRGB<byte> > image = Raster::ReadRGB(manager.getExtraArg(0));
00219   Image<byte> targetmask = Raster::ReadGray(manager.getExtraArg(1));
00220 
00221   // let's get all our ModelComponent instances started:
00222   manager.start();
00223 
00224   // load the weights:
00225   FILE *f = fopen(manager.getExtraArg(2).c_str(), "r");
00226   if (f) {
00227     fclose(f);
00228     LINFO("Loading params from %s", manager.getExtraArg(2).c_str());
00229     /////FIXME    vcx->readParamMap(manager.getExtraArg(2).c_str());
00230   }
00231 
00232   // process the input image:
00233   vcx->input(InputFrame::fromRgb(&image));
00234 
00235   // learn:
00236   bool doDistMap = false;
00237   if (manager.getExtraArg(3).c_str()[0] == 'D') doDistMap = true;
00238   int inthresh = manager.getExtraArgAs<int>(4);
00239   int outthresh = manager.getExtraArgAs<int>(5);
00240   double eta = manager.getExtraArgAs<double>(6);
00241   const double softmask = true;
00242 
00243   // create a chamfer distance map from the target image -> target
00244   // weighting; result has zeros inside the targetmask and 255 outside,
00245   // graded values in between:
00246   Image<byte> dmap;
00247   if (doDistMap) dmap = chamfer34(targetmask);
00248   else dmap = binaryReverse(targetmask, byte(255));
00249 
00250   CoeffLearner l(dmap, eta, softmask, inthresh, outthresh);
00251   vcx->accept(l);
00252 
00253   const double sum = l.absSumCoeffs();
00254 
00255   if (sum < 0.1)
00256     {
00257       LERROR("Sum of coeffs very small (%f). Not normalized.", sum);
00258     }
00259   else
00260     {
00261       const uint nbmaps = vcx->numSubmaps();
00262       LINFO("Coeff normalization: old sum = %f, nbmaps = %d",
00263             sum, nbmaps);
00264 
00265       CoeffNormalizer n(sum / double(nbmaps));
00266       vcx->accept(n);
00267     }
00268 
00269   // save the new params:
00270   LINFO("Saving params to %s", manager.getExtraArg(2).c_str());
00271   ////FIXME  vcx->writeParamMap(manager.getExtraArg(2).c_str());
00272 
00273   // stop all our ModelComponents
00274   manager.stop();
00275 
00276   // all done!
00277   return 0;
00278 }
00279 
00280 // ######################################################################
00281 /* So things look consistent in everyone's emacs... */
00282 /* Local Variables: */
00283 /* indent-tabs-mode: nil */
00284 /* End: */