00001 /*!@file Channels/SoxChannel.C Shortrange Orientation Interactions channel */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00005 // University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Rob Peters <rjpeters@klab.caltech.edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Channels/SoxChannel.C $ 00035 // $Id: SoxChannel.C 8857 2007-10-18 23:38:04Z rjpeters $ 00036 // 00037 00038 #include "Channels/SoxChannel.H" 00039 00040 #include "Channels/ChannelOpts.H" 00041 #include "Component/ModelOptionDef.H" 00042 #include "Component/OptionManager.H" 00043 #include "Component/ParamMap.H" 00044 #include "Image/MathOps.H" // for toPower 00045 #include "Image/ShapeOps.H" // for rescale 00046 #include "Image/fancynorm.H" 00047 #include "Util/Assert.H" 00048 #include "Util/MathFunctions.H" 00049 #include "Util/log.H" 00050 00051 #include <algorithm> // for std::swap 00052 #include <cmath> // for exp 00053 00054 static const ModelOptionDef OPT_SoxThetaPoolWidth = 00055 { MODOPT_ARG(double), "SoxThetaPoolWidth", &MOC_CHANNEL, OPTEXP_CORE, 00056 "The width of the inhibitory pool in the orientation dimension in the " 00057 "SoxChannel (short-range orientation interactions), corresponding to " 00058 "'Sigma_theta' in section 2.7 of VisRes2005.", 00059 "sox-theta-pool-width", '\0', "<double>", "20.0" }; 00060 00061 static const ModelOptionDef OPT_SoxOmegaPoolWidth = 00062 { MODOPT_ARG(double), "SoxOmegaPoolWidth", &MOC_CHANNEL, OPTEXP_CORE, 00063 "The width of the inhibitory pool in the frequency dimension in the " 00064 "SoxChannel (short-range orientation interactions), corresponding to " 00065 "'Sigma_omega' in section 2.7 of VisRes2005.", 00066 "sox-omega-pool-width", '\0', "<double>", "0.001" }; 00067 00068 static const ModelOptionDef OPT_SoxInhibExponent = 00069 { MODOPT_ARG(double), "SoxInhibExponent", &MOC_CHANNEL, OPTEXP_CORE, 00070 "The exponent to which inhibitory contributions are raised in the " 00071 "SoxChannel (short-range orientation interactions), corresponding to " 00072 "'delta' section 2.7 of VisRes2005.", 00073 "sox-inhib-exponent", '\0', "<double>", "1.5" }; 00074 00075 static const ModelOptionDef OPT_SoxExcitExponent = 00076 { MODOPT_ARG(double), "SoxExcitExponent", &MOC_CHANNEL, OPTEXP_CORE, 00077 "The exponent to which excitatory contributions are raised in the " 00078 "SoxChannel (short-range orientation interactions), corresponding to " 00079 "'gamma' section 2.7 of VisRes2005.", 00080 "sox-excit-exponent", '\0', "<double>", "2.0" }; 00081 00082 static const ModelOptionDef OPT_SoxSemiSaturation = 00083 { MODOPT_ARG(double), "SoxSemiSaturation", &MOC_CHANNEL, OPTEXP_CORE, 00084 "The semi-saturation constant added to the divisive inhibition term " 00085 "in the SoxChannel (short-range orientation interactions), corresponding " 00086 "to 'S' section 2.7 of VisRes2005.", 00087 "sox-semi-saturation", '\0', "<double>", "1.0" }; 00088 00089 static const ModelOptionDef OPT_SoxCutoff = 00090 { MODOPT_ARG(double), "SoxCutoff", &MOC_CHANNEL, OPTEXP_CORE, 00091 "As a performance optimization, in the SoxChannel (short-range " 00092 "orientation interactions), inhibitory terms will be dropped from " 00093 "further consideration if their weight ('W' in section 2.7 of " 00094 "VisRes2005) is less than this cutoff value.", 00095 "sox-cutoff", '\0', "<double>", "0.00001" }; 00096 00097 static const ModelOptionDef OPT_ALIASsoxModel0037 = 00098 { MODOPT_ALIAS, "ALIASsoxModel0037", &MOC_ALIAS, OPTEXP_CORE, 00099 "Sox model #0037", 00100 "sox-model-0037", '\0', "", 00101 "--num-orient=6 " 00102 "--sox-cutoff=1e-4 " 00103 "--sox-excit-exponent=4.0 " 00104 "--sox-inhib-exponent=3.5 " 00105 "--sox-omega-pool-width=0.8 " 00106 "--sox-semi-saturation=1.0 " 00107 "--sox-theta-pool-width=20.0 " 00108 }; 00109 00110 // ###################################################################### 00111 SoxChannel::SoxChannel(OptionManager& mgr) : 00112 ComplexChannel(mgr, "Sox", "sox", ORI), 00113 itsNumOrients(&OPT_NumOrientations, this, 6, USE_MY_VAL), 00114 thetaPoolWidth(&OPT_SoxThetaPoolWidth, this), 00115 omegaPoolWidth(&OPT_SoxOmegaPoolWidth, this), 00116 inhibExponent(&OPT_SoxInhibExponent, this), 00117 excitExponent(&OPT_SoxExcitExponent, this), 00118 semiSaturation(&OPT_SoxSemiSaturation, this), 00119 cutoff(&OPT_SoxCutoff, this) 00120 { 00121 mgr.requestOptionAlias(&OPT_ALIASsoxModel0037); 00122 00123 // let's build our channels; we may have to re-build them if 00124 // itsNumOrient get changed on us before we start(): 00125 buildSubChans(); 00126 } 00127 00128 // ###################################################################### 00129 void SoxChannel::buildSubChans() 00130 { 00131 // kill any subchans we may have had... 00132 this->removeAllSubChans(); 00133 00134 // let's instantiate our Gabor subchannels now that we know how many 00135 // we want. They will inherit the current values (typically 00136 // post-command-line parsing) of all their options as they are 00137 // constructed: 00138 LINFO("Using %d orientations spanning [0..180]deg", itsNumOrients.getVal()); 00139 for (uint ori = 0; ori < itsNumOrients.getVal(); ++ori) 00140 { 00141 nub::ref<GaborChannel> chan 00142 (makeSharedComp 00143 (new GaborChannel(getManager(), 00144 ori, 180.0 * double(ori) / 00145 double(itsNumOrients.getVal())))); 00146 00147 chan->setComputeFullPyramid(true); 00148 00149 this->addSubChan(chan); 00150 00151 // let's export options on the newly built channel: 00152 chan->exportOptions(MC_RECURSE); 00153 } 00154 } 00155 00156 // ###################################################################### 00157 void SoxChannel::paramChanged(ModelParamBase* const param, 00158 const bool valueChanged, 00159 ParamClient::ChangeStatus* status) 00160 { 00161 ComplexChannel::paramChanged(param, valueChanged, status); 00162 00163 // if the param is our number of orientations and it has become 00164 // different from our number of channels, let's reconfigure: 00165 if (param == &itsNumOrients && 00166 numChans() != itsNumOrients.getVal()) 00167 buildSubChans(); 00168 } 00169 00170 // ###################################################################### 00171 nub::ref<GaborChannel> SoxChannel::gabor(const uint idx) const 00172 { return dynCast<GaborChannel>(subChan(idx)); } 00173 00174 // ###################################################################### 00175 SoxChannel::~SoxChannel() 00176 { } 00177 00178 // ###################################################################### 00179 uint SoxChannel::numScales() const 00180 { return gabor(0)->getLevelSpec().maxDepth(); } 00181 00182 // ###################################################################### 00183 void SoxChannel::doInput(const InputFrame& inframe) 00184 { 00185 ASSERT(inframe.grayFloat().initialized()); 00186 00187 // compute oriented gabor pyramids in several basis directions: 00188 for (uint ii = 0; ii < numChans(); ++ii) 00189 { 00190 gabor(ii)->input(inframe); 00191 LINFO("Orientation pyramid (%d/%d) ok.", ii+1, numChans()); 00192 } 00193 } 00194 00195 // ###################################################################### 00196 Image<float> SoxChannel::getLinearResponse(int ori, int scl) 00197 { 00198 const Image<float> result = gabor(ori)->getImage(scl); 00199 ASSERT(result.initialized()); 00200 return result; 00201 } 00202 00203 // ###################################################################### 00204 Image<float> SoxChannel::getNonlinearResponse(int exc_ori, int exc_scl) 00205 { 00206 const Dims inp_dims = getInputDims(); 00207 00208 const Image<float> exc_img = getLinearResponse(exc_ori, exc_scl); 00209 00210 ASSERT(exc_img.initialized()); 00211 00212 const double exc_theta = gabor(exc_ori)->angle(); 00213 00214 const double exc_frq = log(exc_img.getWidth() / double(inp_dims.w())); 00215 00216 Image<float> inh_pool(exc_img.getDims(), NO_INIT); 00217 inh_pool.clear(semiSaturation.getVal()); 00218 00219 int kept = 0; 00220 int skipped = 0; 00221 00222 LINFO("orientation %d/%d, scale %d/%d", 00223 exc_ori+1, numChans(), 00224 exc_scl+1, numScales()); 00225 00226 double totalFactorW = 0.0; 00227 00228 for (uint inh_ori = 0; inh_ori < numChans(); ++inh_ori) 00229 { 00230 const double inh_theta = gabor(inh_ori)->angle(); 00231 00232 for (uint inh_scl = 0; inh_scl < numScales(); ++inh_scl) 00233 { 00234 const Image<float> inh_img = getLinearResponse(inh_ori, inh_scl); 00235 00236 const double inh_frq = log(inh_img.getWidth()/double(inp_dims.w())); 00237 00238 double theta_diff = exc_theta-inh_theta; 00239 if (theta_diff <= -90.0) theta_diff += 180.0; 00240 else if (theta_diff > 90.0) theta_diff -= 180.0; 00241 00242 const double factorW = 00243 exp(-squareOf(theta_diff)/(2*squareOf(thetaPoolWidth.getVal())) 00244 -squareOf(exc_frq-inh_frq)/(2*squareOf(omegaPoolWidth.getVal()))); 00245 00246 totalFactorW += factorW; 00247 00248 // Performance optimization: we choose some cutoff level for 00249 // factorW (which determines how much inhibitory weights the 00250 // other orientation/scale combinations have on the current 00251 // orientation/scale); below the cutoff level, we just ignore the 00252 // other orientation/scale entirely. 00253 if (factorW < cutoff.getVal()) 00254 { 00255 ++skipped; 00256 } 00257 else 00258 { 00259 ++kept; 00260 00261 LDEBUG("factorW: (%.1f,%f,%.1f,%f) %f", 00262 exc_theta, exc_frq, inh_theta, inh_frq, factorW); 00263 00264 // FIXME speedup by caching the exponentiated img's 00265 inplaceAddWeighted(inh_pool, 00266 getInhib(inh_ori, inh_scl, exc_scl, 00267 inh_pool.getDims(), inh_img), 00268 factorW); 00269 } 00270 } 00271 } 00272 00273 LINFO("total inh weight: %f", totalFactorW); 00274 00275 LINFO("cutoff (%f): kept %d, skipped %d", cutoff.getVal(), kept, skipped); 00276 00277 Image<float> result = toPower(exc_img, excitExponent.getVal()) / inh_pool; 00278 00279 return result; 00280 } 00281 00282 // ###################################################################### 00283 Image<float> SoxChannel::combineOutputs() 00284 { 00285 Dims dims = getMapDims(); 00286 00287 Image<float> result(dims, ZEROS); 00288 00289 for (uint exc_ori = 0; exc_ori < numChans(); ++exc_ori) 00290 { 00291 for (uint exc_scl = 0; exc_scl < numScales(); ++exc_scl) 00292 { 00293 Image<float> resp = getNonlinearResponse(exc_ori, exc_scl); 00294 00295 result += rescale(resp, dims); 00296 } 00297 } 00298 00299 if (itsNormType.getVal() == VCXNORM_MAXNORM) 00300 return maxNormalize(result, MAXNORMMIN, MAXNORMMAX, VCXNORM_MAXNORM); 00301 else 00302 return maxNormalize(result, 0.0f, 0.0f, itsNormType.getVal()); 00303 } 00304 00305 // ###################################################################### 00306 void SoxChannel::killCaches() 00307 { 00308 ComplexChannel::killCaches(); // call our base class's version 00309 00310 std::vector<Cache>().swap(inhibCaches); 00311 } 00312 00313 // ###################################################################### 00314 Image<float> SoxChannel::getInhib(int ori, int scl, int exc_scl, 00315 const Dims& dims, 00316 const Image<float>& linearResponse) 00317 { 00318 if (int(inhibCaches.size()) <= exc_scl) 00319 inhibCaches.resize(exc_scl+1); 00320 00321 Cache& cache = inhibCaches[exc_scl]; 00322 00323 const LevelSpec ls = gabor(0)->getModelParamVal<LevelSpec>("LevelSpec"); 00324 const unsigned int nscale = ls.maxDepth(); 00325 00326 unsigned int index = scl * nscale + ori; 00327 00328 if (cache.size() <= index) 00329 cache.resize(index+1); 00330 00331 if (!cache[index].img.initialized()) 00332 { 00333 cache[index] = 00334 CacheElem(rescale(toPower(linearResponse, inhibExponent.getVal()), 00335 dims), 00336 ori, scl, exc_scl); 00337 } 00338 00339 const CacheElem& elem = cache[index]; 00340 00341 ASSERT(elem.ori == ori); 00342 ASSERT(elem.scl == scl); 00343 ASSERT(elem.exc_scl == exc_scl); 00344 00345 return elem.img; 00346 } 00347 00348 // ###################################################################### 00349 /* So things look consistent in everyone's emacs... */ 00350 /* Local Variables: */ 00351 /* indent-tabs-mode: nil */ 00352 /* End: */