00001 /*!@file INVT/learnvision.C like ezvision.C but focused on learning */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00005 // University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Laurent Itti <itti@usc.edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/INVT/learnvision.C $ 00035 // $Id: learnvision.C 12074 2009-11-24 07:51:51Z itti $ 00036 // 00037 00038 #include "Channels/ChannelBase.H" 00039 #include "Channels/ChannelVisitor.H" 00040 #include "Channels/SingleChannel.H" 00041 #include "Component/ModelManager.H" 00042 #include "Image/Image.H" 00043 #include "Image/MathOps.H" 00044 #include "Image/Pixels.H" 00045 #include "Image/ShapeOps.H" // for rescale() 00046 #include "Image/Transforms.H" // for chamfer34() 00047 #include "Channels/RawVisualCortex.H" 00048 #include "Raster/Raster.H" 00049 #include "Util/SimTime.H" 00050 #include "Util/Types.H" 00051 00052 #include <vector> 00053 #include <cstdio> 00054 00055 namespace 00056 { 00057 00058 class CoeffLearner : public ChannelVisitor 00059 { 00060 public: 00061 CoeffLearner(const Image<byte>& dmap, const double eta, 00062 const bool softmask, 00063 const int inthresh, const int outthresh) 00064 : 00065 itsDmap(dmap), 00066 itsEta(eta), 00067 itsSoftmask(softmask), 00068 itsInThresh(inthresh), 00069 itsOutThresh(outthresh), 00070 itsAbsSumCoeffs() 00071 { 00072 itsAbsSumCoeffs.push_back(0.0); 00073 } 00074 00075 virtual ~CoeffLearner() {} 00076 00077 double absSumCoeffs() const 00078 { 00079 ASSERT(itsAbsSumCoeffs.size() == 1); 00080 return itsAbsSumCoeffs.back(); 00081 } 00082 00083 virtual void visitChannelBase(ChannelBase& chan) 00084 { 00085 LFATAL("don't know how to handle %s", chan.tagName().c_str()); 00086 } 00087 00088 virtual void visitSingleChannel(SingleChannel& chan) 00089 { 00090 if (chan.visualFeature() == FLICKER) 00091 { 00092 // do nothing; we can't "learn" flicker from a single input 00093 // image 00094 return; 00095 } 00096 00097 chan.killCaches(); 00098 /* FIXME 00099 const LevelSpec ls = chan.getLevelSpec(); 00100 00101 for (uint del = ls.delMin(); del <= ls.delMax(); ++del) 00102 for (uint lev = ls.levMin(); lev <= ls.levMax(); ++lev) 00103 { 00104 const uint idx = ls.csToIndex(lev, lev+del); 00105 const Image<float> fmap = chan.getSubmap(idx); 00106 const double oldcoeff = chan.getCoeff(idx); 00107 00108 const double newcoeff = oldcoeff + 00109 itsEta * learningCoeff(fmap, 00110 rescale(itsDmap, fmap.getDims()), 00111 itsSoftmask, 00112 itsInThresh, itsOutThresh); 00113 00114 chan.setCoeff(idx, newcoeff); 00115 00116 LINFO("%s(%d,%d): %f -> %f", 00117 chan.tagName().c_str(), lev, lev+del, 00118 oldcoeff, newcoeff); 00119 } 00120 00121 chan.clampCoeffs(0.0, 100.0); 00122 00123 ASSERT(itsAbsSumCoeffs.size() > 0); 00124 itsAbsSumCoeffs.back() += chan.absSumCoeffs(); 00125 */ 00126 } 00127 00128 virtual void visitComplexChannel(ComplexChannel& chan) 00129 { 00130 chan.killCaches(); 00131 /* FIXME 00132 for (uint i = 0; i < chan.numChans(); ++i) 00133 { 00134 itsAbsSumCoeffs.push_back(0.0); 00135 chan.subChan(i)->accept(*this); 00136 const double wt = 00137 clampValue(chan.getSubchanTotalWeight(i) / chan.numSubmaps(), 00138 0.0, 100.0); 00139 chan.setSubchanTotalWeight(i, wt * chan.numSubmaps()); 00140 itsAbsSumCoeffs.back() *= chan.getSubchanTotalWeight(i); 00141 00142 const double subsum = itsAbsSumCoeffs.back(); 00143 itsAbsSumCoeffs.pop_back(); 00144 00145 itsAbsSumCoeffs.back() += subsum; 00146 } 00147 */ 00148 // I leave to the user the opportunity to normalize the coeffs 00149 // or not after each learning. Some normalization should be done 00150 // at some point to prevent coeff blowout. 00151 } 00152 00153 private: 00154 const Image<byte> itsDmap; 00155 const double itsEta; 00156 const bool itsSoftmask; 00157 const int itsInThresh; 00158 const int itsOutThresh; 00159 00160 std::vector<double> itsAbsSumCoeffs; 00161 }; 00162 00163 00164 class CoeffNormalizer : public ChannelVisitor 00165 { 00166 public: 00167 CoeffNormalizer(const double div) 00168 : 00169 itsDiv(div) 00170 {} 00171 00172 virtual ~CoeffNormalizer() {} 00173 00174 virtual void visitChannelBase(ChannelBase& chan) 00175 { 00176 LFATAL("don't know how to handle %s", chan.tagName().c_str()); 00177 } 00178 00179 virtual void visitSingleChannel(SingleChannel& chan) 00180 { 00181 ////FIXME chan.normalizeCoeffs(itsDiv); 00182 } 00183 00184 virtual void visitComplexChannel(ComplexChannel& chan) 00185 { 00186 for (uint i = 0; i < chan.numChans(); ++i) 00187 chan.subChan(i)->accept(*this); 00188 } 00189 00190 private: 00191 const double itsDiv; 00192 }; 00193 00194 } 00195 00196 //! Basic program to learn feature map weights from static images 00197 /*! This program allows training of the relative weights of feature 00198 maps, given an image and associated binary target mask. */ 00199 int main(const int argc, const char **argv) 00200 { 00201 MYLOGVERB = LOG_INFO; // suppress debug messages 00202 00203 // Instantiate a ModelManager: 00204 ModelManager manager("Attention Model"); 00205 00206 // Instantiate our various ModelComponents: 00207 nub::soft_ref<RawVisualCortex> vcx(new RawVisualCortex(manager)); 00208 manager.addSubComponent(vcx); 00209 00210 // Parse command-line: 00211 if (manager.parseCommandLine(argc, argv, 00212 "<image> <targetMask> <coeffs.pmap> " 00213 "<D|N> <inthresh> <outthresh> <eta>", 00214 7, 7) == false) 00215 return(1); 00216 00217 // do post-command-line configs: 00218 Image< PixRGB<byte> > image = Raster::ReadRGB(manager.getExtraArg(0)); 00219 Image<byte> targetmask = Raster::ReadGray(manager.getExtraArg(1)); 00220 00221 // let's get all our ModelComponent instances started: 00222 manager.start(); 00223 00224 // load the weights: 00225 FILE *f = fopen(manager.getExtraArg(2).c_str(), "r"); 00226 if (f) { 00227 fclose(f); 00228 LINFO("Loading params from %s", manager.getExtraArg(2).c_str()); 00229 /////FIXME vcx->readParamMap(manager.getExtraArg(2).c_str()); 00230 } 00231 00232 // process the input image: 00233 vcx->input(InputFrame::fromRgb(&image)); 00234 00235 // learn: 00236 bool doDistMap = false; 00237 if (manager.getExtraArg(3).c_str()[0] == 'D') doDistMap = true; 00238 int inthresh = manager.getExtraArgAs<int>(4); 00239 int outthresh = manager.getExtraArgAs<int>(5); 00240 double eta = manager.getExtraArgAs<double>(6); 00241 const double softmask = true; 00242 00243 // create a chamfer distance map from the target image -> target 00244 // weighting; result has zeros inside the targetmask and 255 outside, 00245 // graded values in between: 00246 Image<byte> dmap; 00247 if (doDistMap) dmap = chamfer34(targetmask); 00248 else dmap = binaryReverse(targetmask, byte(255)); 00249 00250 CoeffLearner l(dmap, eta, softmask, inthresh, outthresh); 00251 vcx->accept(l); 00252 00253 const double sum = l.absSumCoeffs(); 00254 00255 if (sum < 0.1) 00256 { 00257 LERROR("Sum of coeffs very small (%f). Not normalized.", sum); 00258 } 00259 else 00260 { 00261 const uint nbmaps = vcx->numSubmaps(); 00262 LINFO("Coeff normalization: old sum = %f, nbmaps = %d", 00263 sum, nbmaps); 00264 00265 CoeffNormalizer n(sum / double(nbmaps)); 00266 vcx->accept(n); 00267 } 00268 00269 // save the new params: 00270 LINFO("Saving params to %s", manager.getExtraArg(2).c_str()); 00271 ////FIXME vcx->writeParamMap(manager.getExtraArg(2).c_str()); 00272 00273 // stop all our ModelComponents 00274 manager.stop(); 00275 00276 // all done! 00277 return 0; 00278 } 00279 00280 // ###################################################################### 00281 /* So things look consistent in everyone's emacs... */ 00282 /* Local Variables: */ 00283 /* indent-tabs-mode: nil */ 00284 /* End: */