SoxChannel.C

Go to the documentation of this file.
00001 /*!@file Channels/SoxChannel.C Shortrange Orientation Interactions channel */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the //
00005 // University of Southern California (USC) and the iLab at USC.         //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: Rob Peters <rjpeters@klab.caltech.edu>
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Channels/SoxChannel.C $
00035 // $Id: SoxChannel.C 8857 2007-10-18 23:38:04Z rjpeters $
00036 //
00037 
00038 #include "Channels/SoxChannel.H"
00039 
00040 #include "Channels/ChannelOpts.H"
00041 #include "Component/ModelOptionDef.H"
00042 #include "Component/OptionManager.H"
00043 #include "Component/ParamMap.H"
00044 #include "Image/MathOps.H" // for toPower
00045 #include "Image/ShapeOps.H" // for rescale
00046 #include "Image/fancynorm.H"
00047 #include "Util/Assert.H"
00048 #include "Util/MathFunctions.H"
00049 #include "Util/log.H"
00050 
00051 #include <algorithm> // for std::swap
00052 #include <cmath> // for exp
00053 
00054 static const ModelOptionDef OPT_SoxThetaPoolWidth =
00055   { MODOPT_ARG(double), "SoxThetaPoolWidth", &MOC_CHANNEL, OPTEXP_CORE,
00056     "The width of the inhibitory pool in the orientation dimension in the "
00057     "SoxChannel (short-range orientation interactions), corresponding to "
00058     "'Sigma_theta' in section 2.7 of VisRes2005.",
00059     "sox-theta-pool-width", '\0', "<double>", "20.0" };
00060 
00061 static const ModelOptionDef OPT_SoxOmegaPoolWidth =
00062   { MODOPT_ARG(double), "SoxOmegaPoolWidth", &MOC_CHANNEL, OPTEXP_CORE,
00063     "The width of the inhibitory pool in the frequency dimension in the "
00064     "SoxChannel (short-range orientation interactions), corresponding to "
00065     "'Sigma_omega' in section 2.7 of VisRes2005.",
00066     "sox-omega-pool-width", '\0', "<double>", "0.001" };
00067 
00068 static const ModelOptionDef OPT_SoxInhibExponent =
00069   { MODOPT_ARG(double), "SoxInhibExponent", &MOC_CHANNEL, OPTEXP_CORE,
00070     "The exponent to which inhibitory contributions are raised in the "
00071     "SoxChannel (short-range orientation interactions), corresponding to "
00072     "'delta' section 2.7 of VisRes2005.",
00073     "sox-inhib-exponent", '\0', "<double>", "1.5" };
00074 
00075 static const ModelOptionDef OPT_SoxExcitExponent =
00076   { MODOPT_ARG(double), "SoxExcitExponent", &MOC_CHANNEL, OPTEXP_CORE,
00077     "The exponent to which excitatory contributions are raised in the "
00078     "SoxChannel (short-range orientation interactions), corresponding to "
00079     "'gamma' section 2.7 of VisRes2005.",
00080     "sox-excit-exponent", '\0', "<double>", "2.0" };
00081 
00082 static const ModelOptionDef OPT_SoxSemiSaturation =
00083   { MODOPT_ARG(double), "SoxSemiSaturation", &MOC_CHANNEL, OPTEXP_CORE,
00084     "The semi-saturation constant added to the divisive inhibition term "
00085     "in the SoxChannel (short-range orientation interactions), corresponding "
00086     "to 'S' section 2.7 of VisRes2005.",
00087     "sox-semi-saturation", '\0', "<double>", "1.0" };
00088 
00089 static const ModelOptionDef OPT_SoxCutoff =
00090   { MODOPT_ARG(double), "SoxCutoff", &MOC_CHANNEL, OPTEXP_CORE,
00091     "As a performance optimization, in the SoxChannel (short-range "
00092     "orientation interactions), inhibitory terms will be dropped from "
00093     "further consideration if their weight ('W' in section 2.7 of "
00094     "VisRes2005) is less than this cutoff value.",
00095     "sox-cutoff", '\0', "<double>", "0.00001" };
00096 
00097 static const ModelOptionDef OPT_ALIASsoxModel0037 =
00098   { MODOPT_ALIAS, "ALIASsoxModel0037", &MOC_ALIAS, OPTEXP_CORE,
00099     "Sox model #0037",
00100     "sox-model-0037", '\0', "",
00101     "--num-orient=6 "
00102     "--sox-cutoff=1e-4 "
00103     "--sox-excit-exponent=4.0 "
00104     "--sox-inhib-exponent=3.5 "
00105     "--sox-omega-pool-width=0.8 "
00106     "--sox-semi-saturation=1.0 "
00107     "--sox-theta-pool-width=20.0 "
00108   };
00109 
00110 // ######################################################################
00111 SoxChannel::SoxChannel(OptionManager& mgr) :
00112   ComplexChannel(mgr, "Sox", "sox", ORI),
00113   itsNumOrients(&OPT_NumOrientations, this, 6, USE_MY_VAL),
00114   thetaPoolWidth(&OPT_SoxThetaPoolWidth, this),
00115   omegaPoolWidth(&OPT_SoxOmegaPoolWidth, this),
00116   inhibExponent(&OPT_SoxInhibExponent, this),
00117   excitExponent(&OPT_SoxExcitExponent, this),
00118   semiSaturation(&OPT_SoxSemiSaturation, this),
00119   cutoff(&OPT_SoxCutoff, this)
00120 {
00121   mgr.requestOptionAlias(&OPT_ALIASsoxModel0037);
00122 
00123   // let's build our channels; we may have to re-build them if
00124   // itsNumOrient get changed on us before we start():
00125   buildSubChans();
00126 }
00127 
00128 // ######################################################################
00129 void SoxChannel::buildSubChans()
00130 {
00131   // kill any subchans we may have had...
00132   this->removeAllSubChans();
00133 
00134   // let's instantiate our Gabor subchannels now that we know how many
00135   // we want. They will inherit the current values (typically
00136   // post-command-line parsing) of all their options as they are
00137   // constructed:
00138   LINFO("Using %d orientations spanning [0..180]deg", itsNumOrients.getVal());
00139   for (uint ori = 0; ori < itsNumOrients.getVal(); ++ori)
00140     {
00141       nub::ref<GaborChannel> chan
00142         (makeSharedComp
00143          (new GaborChannel(getManager(),
00144                            ori, 180.0 * double(ori) /
00145                            double(itsNumOrients.getVal()))));
00146 
00147       chan->setComputeFullPyramid(true);
00148 
00149       this->addSubChan(chan);
00150 
00151       // let's export options on the newly built channel:
00152       chan->exportOptions(MC_RECURSE);
00153     }
00154 }
00155 
00156 // ######################################################################
00157 void SoxChannel::paramChanged(ModelParamBase* const param,
00158                               const bool valueChanged,
00159                               ParamClient::ChangeStatus* status)
00160 {
00161   ComplexChannel::paramChanged(param, valueChanged, status);
00162 
00163   // if the param is our number of orientations and it has become
00164   // different from our number of channels, let's reconfigure:
00165   if (param == &itsNumOrients &&
00166       numChans() != itsNumOrients.getVal())
00167     buildSubChans();
00168 }
00169 
00170 // ######################################################################
00171 nub::ref<GaborChannel> SoxChannel::gabor(const uint idx) const
00172 { return dynCast<GaborChannel>(subChan(idx)); }
00173 
00174 // ######################################################################
00175 SoxChannel::~SoxChannel()
00176 {  }
00177 
00178 // ######################################################################
00179 uint SoxChannel::numScales() const
00180 { return gabor(0)->getLevelSpec().maxDepth(); }
00181 
00182 // ######################################################################
00183 void SoxChannel::doInput(const InputFrame& inframe)
00184 {
00185   ASSERT(inframe.grayFloat().initialized());
00186 
00187   // compute oriented gabor pyramids in several basis directions:
00188   for (uint ii = 0; ii < numChans(); ++ii)
00189     {
00190       gabor(ii)->input(inframe);
00191       LINFO("Orientation pyramid (%d/%d) ok.", ii+1, numChans());
00192     }
00193 }
00194 
00195 // ######################################################################
00196 Image<float> SoxChannel::getLinearResponse(int ori, int scl)
00197 {
00198   const Image<float> result = gabor(ori)->getImage(scl);
00199   ASSERT(result.initialized());
00200   return result;
00201 }
00202 
00203 // ######################################################################
00204 Image<float> SoxChannel::getNonlinearResponse(int exc_ori, int exc_scl)
00205 {
00206   const Dims inp_dims = getInputDims();
00207 
00208   const Image<float> exc_img = getLinearResponse(exc_ori, exc_scl);
00209 
00210   ASSERT(exc_img.initialized());
00211 
00212   const double exc_theta = gabor(exc_ori)->angle();
00213 
00214   const double exc_frq = log(exc_img.getWidth() / double(inp_dims.w()));
00215 
00216   Image<float> inh_pool(exc_img.getDims(), NO_INIT);
00217   inh_pool.clear(semiSaturation.getVal());
00218 
00219   int kept = 0;
00220   int skipped = 0;
00221 
00222   LINFO("orientation %d/%d, scale %d/%d",
00223         exc_ori+1, numChans(),
00224         exc_scl+1, numScales());
00225 
00226   double totalFactorW = 0.0;
00227 
00228   for (uint inh_ori = 0; inh_ori < numChans(); ++inh_ori)
00229     {
00230       const double inh_theta = gabor(inh_ori)->angle();
00231 
00232       for (uint inh_scl = 0; inh_scl < numScales(); ++inh_scl)
00233         {
00234           const Image<float> inh_img = getLinearResponse(inh_ori, inh_scl);
00235 
00236           const double inh_frq = log(inh_img.getWidth()/double(inp_dims.w()));
00237 
00238           double theta_diff = exc_theta-inh_theta;
00239           if (theta_diff <= -90.0) theta_diff += 180.0;
00240           else if (theta_diff > 90.0) theta_diff -= 180.0;
00241 
00242           const double factorW =
00243             exp(-squareOf(theta_diff)/(2*squareOf(thetaPoolWidth.getVal()))
00244                 -squareOf(exc_frq-inh_frq)/(2*squareOf(omegaPoolWidth.getVal())));
00245 
00246           totalFactorW += factorW;
00247 
00248           // Performance optimization: we choose some cutoff level for
00249           // factorW (which determines how much inhibitory weights the
00250           // other orientation/scale combinations have on the current
00251           // orientation/scale); below the cutoff level, we just ignore the
00252           // other orientation/scale entirely.
00253           if (factorW < cutoff.getVal())
00254             {
00255               ++skipped;
00256             }
00257           else
00258             {
00259               ++kept;
00260 
00261               LDEBUG("factorW: (%.1f,%f,%.1f,%f) %f",
00262                      exc_theta, exc_frq, inh_theta, inh_frq, factorW);
00263 
00264               // FIXME speedup by caching the exponentiated img's
00265               inplaceAddWeighted(inh_pool,
00266                                  getInhib(inh_ori, inh_scl, exc_scl,
00267                                           inh_pool.getDims(), inh_img),
00268                                  factorW);
00269             }
00270         }
00271     }
00272 
00273   LINFO("total inh weight: %f", totalFactorW);
00274 
00275   LINFO("cutoff (%f): kept %d, skipped %d", cutoff.getVal(), kept, skipped);
00276 
00277   Image<float> result = toPower(exc_img, excitExponent.getVal()) / inh_pool;
00278 
00279   return result;
00280 }
00281 
00282 // ######################################################################
00283 Image<float> SoxChannel::combineOutputs()
00284 {
00285   Dims dims = getMapDims();
00286 
00287   Image<float> result(dims, ZEROS);
00288 
00289   for (uint exc_ori = 0; exc_ori < numChans(); ++exc_ori)
00290     {
00291       for (uint exc_scl = 0; exc_scl < numScales(); ++exc_scl)
00292         {
00293           Image<float> resp = getNonlinearResponse(exc_ori, exc_scl);
00294 
00295           result += rescale(resp, dims);
00296         }
00297     }
00298 
00299   if (itsNormType.getVal() == VCXNORM_MAXNORM)
00300     return maxNormalize(result, MAXNORMMIN, MAXNORMMAX, VCXNORM_MAXNORM);
00301   else
00302     return maxNormalize(result, 0.0f, 0.0f, itsNormType.getVal());
00303 }
00304 
00305 // ######################################################################
00306 void SoxChannel::killCaches()
00307 {
00308   ComplexChannel::killCaches(); // call our base class's version
00309 
00310   std::vector<Cache>().swap(inhibCaches);
00311 }
00312 
00313 // ######################################################################
00314 Image<float> SoxChannel::getInhib(int ori, int scl, int exc_scl,
00315                                   const Dims& dims,
00316                                   const Image<float>& linearResponse)
00317 {
00318   if (int(inhibCaches.size()) <= exc_scl)
00319     inhibCaches.resize(exc_scl+1);
00320 
00321   Cache& cache = inhibCaches[exc_scl];
00322 
00323   const LevelSpec ls = gabor(0)->getModelParamVal<LevelSpec>("LevelSpec");
00324   const unsigned int nscale = ls.maxDepth();
00325 
00326   unsigned int index = scl * nscale + ori;
00327 
00328   if (cache.size() <= index)
00329     cache.resize(index+1);
00330 
00331   if (!cache[index].img.initialized())
00332     {
00333       cache[index] =
00334         CacheElem(rescale(toPower(linearResponse, inhibExponent.getVal()),
00335                           dims),
00336                   ori, scl, exc_scl);
00337     }
00338 
00339   const CacheElem& elem = cache[index];
00340 
00341   ASSERT(elem.ori == ori);
00342   ASSERT(elem.scl == scl);
00343   ASSERT(elem.exc_scl == exc_scl);
00344 
00345   return elem.img;
00346 }
00347 
00348 // ######################################################################
00349 /* So things look consistent in everyone's emacs... */
00350 /* Local Variables: */
00351 /* indent-tabs-mode: nil */
00352 /* End: */