00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #include "Channels/SoxChannel.H"
00039
00040 #include "Channels/ChannelOpts.H"
00041 #include "Component/ModelOptionDef.H"
00042 #include "Component/OptionManager.H"
00043 #include "Component/ParamMap.H"
00044 #include "Image/MathOps.H"
00045 #include "Image/ShapeOps.H"
00046 #include "Image/fancynorm.H"
00047 #include "Util/Assert.H"
00048 #include "Util/MathFunctions.H"
00049 #include "Util/log.H"
00050
00051 #include <algorithm>
00052 #include <cmath>
00053
00054 static const ModelOptionDef OPT_SoxThetaPoolWidth =
00055 { MODOPT_ARG(double), "SoxThetaPoolWidth", &MOC_CHANNEL, OPTEXP_CORE,
00056 "The width of the inhibitory pool in the orientation dimension in the "
00057 "SoxChannel (short-range orientation interactions), corresponding to "
00058 "'Sigma_theta' in section 2.7 of VisRes2005.",
00059 "sox-theta-pool-width", '\0', "<double>", "20.0" };
00060
00061 static const ModelOptionDef OPT_SoxOmegaPoolWidth =
00062 { MODOPT_ARG(double), "SoxOmegaPoolWidth", &MOC_CHANNEL, OPTEXP_CORE,
00063 "The width of the inhibitory pool in the frequency dimension in the "
00064 "SoxChannel (short-range orientation interactions), corresponding to "
00065 "'Sigma_omega' in section 2.7 of VisRes2005.",
00066 "sox-omega-pool-width", '\0', "<double>", "0.001" };
00067
00068 static const ModelOptionDef OPT_SoxInhibExponent =
00069 { MODOPT_ARG(double), "SoxInhibExponent", &MOC_CHANNEL, OPTEXP_CORE,
00070 "The exponent to which inhibitory contributions are raised in the "
00071 "SoxChannel (short-range orientation interactions), corresponding to "
00072 "'delta' section 2.7 of VisRes2005.",
00073 "sox-inhib-exponent", '\0', "<double>", "1.5" };
00074
00075 static const ModelOptionDef OPT_SoxExcitExponent =
00076 { MODOPT_ARG(double), "SoxExcitExponent", &MOC_CHANNEL, OPTEXP_CORE,
00077 "The exponent to which excitatory contributions are raised in the "
00078 "SoxChannel (short-range orientation interactions), corresponding to "
00079 "'gamma' section 2.7 of VisRes2005.",
00080 "sox-excit-exponent", '\0', "<double>", "2.0" };
00081
00082 static const ModelOptionDef OPT_SoxSemiSaturation =
00083 { MODOPT_ARG(double), "SoxSemiSaturation", &MOC_CHANNEL, OPTEXP_CORE,
00084 "The semi-saturation constant added to the divisive inhibition term "
00085 "in the SoxChannel (short-range orientation interactions), corresponding "
00086 "to 'S' section 2.7 of VisRes2005.",
00087 "sox-semi-saturation", '\0', "<double>", "1.0" };
00088
00089 static const ModelOptionDef OPT_SoxCutoff =
00090 { MODOPT_ARG(double), "SoxCutoff", &MOC_CHANNEL, OPTEXP_CORE,
00091 "As a performance optimization, in the SoxChannel (short-range "
00092 "orientation interactions), inhibitory terms will be dropped from "
00093 "further consideration if their weight ('W' in section 2.7 of "
00094 "VisRes2005) is less than this cutoff value.",
00095 "sox-cutoff", '\0', "<double>", "0.00001" };
00096
00097 static const ModelOptionDef OPT_ALIASsoxModel0037 =
00098 { MODOPT_ALIAS, "ALIASsoxModel0037", &MOC_ALIAS, OPTEXP_CORE,
00099 "Sox model #0037",
00100 "sox-model-0037", '\0', "",
00101 "--num-orient=6 "
00102 "--sox-cutoff=1e-4 "
00103 "--sox-excit-exponent=4.0 "
00104 "--sox-inhib-exponent=3.5 "
00105 "--sox-omega-pool-width=0.8 "
00106 "--sox-semi-saturation=1.0 "
00107 "--sox-theta-pool-width=20.0 "
00108 };
00109
00110
00111 SoxChannel::SoxChannel(OptionManager& mgr) :
00112 ComplexChannel(mgr, "Sox", "sox", ORI),
00113 itsNumOrients(&OPT_NumOrientations, this, 6, USE_MY_VAL),
00114 thetaPoolWidth(&OPT_SoxThetaPoolWidth, this),
00115 omegaPoolWidth(&OPT_SoxOmegaPoolWidth, this),
00116 inhibExponent(&OPT_SoxInhibExponent, this),
00117 excitExponent(&OPT_SoxExcitExponent, this),
00118 semiSaturation(&OPT_SoxSemiSaturation, this),
00119 cutoff(&OPT_SoxCutoff, this)
00120 {
00121 mgr.requestOptionAlias(&OPT_ALIASsoxModel0037);
00122
00123
00124
00125 buildSubChans();
00126 }
00127
00128
00129 void SoxChannel::buildSubChans()
00130 {
00131
00132 this->removeAllSubChans();
00133
00134
00135
00136
00137
00138 LINFO("Using %d orientations spanning [0..180]deg", itsNumOrients.getVal());
00139 for (uint ori = 0; ori < itsNumOrients.getVal(); ++ori)
00140 {
00141 nub::ref<GaborChannel> chan
00142 (makeSharedComp
00143 (new GaborChannel(getManager(),
00144 ori, 180.0 * double(ori) /
00145 double(itsNumOrients.getVal()))));
00146
00147 chan->setComputeFullPyramid(true);
00148
00149 this->addSubChan(chan);
00150
00151
00152 chan->exportOptions(MC_RECURSE);
00153 }
00154 }
00155
00156
00157 void SoxChannel::paramChanged(ModelParamBase* const param,
00158 const bool valueChanged,
00159 ParamClient::ChangeStatus* status)
00160 {
00161 ComplexChannel::paramChanged(param, valueChanged, status);
00162
00163
00164
00165 if (param == &itsNumOrients &&
00166 numChans() != itsNumOrients.getVal())
00167 buildSubChans();
00168 }
00169
00170
00171 nub::ref<GaborChannel> SoxChannel::gabor(const uint idx) const
00172 { return dynCast<GaborChannel>(subChan(idx)); }
00173
00174
00175 SoxChannel::~SoxChannel()
00176 { }
00177
00178
00179 uint SoxChannel::numScales() const
00180 { return gabor(0)->getLevelSpec().maxDepth(); }
00181
00182
00183 void SoxChannel::doInput(const InputFrame& inframe)
00184 {
00185 ASSERT(inframe.grayFloat().initialized());
00186
00187
00188 for (uint ii = 0; ii < numChans(); ++ii)
00189 {
00190 gabor(ii)->input(inframe);
00191 LINFO("Orientation pyramid (%d/%d) ok.", ii+1, numChans());
00192 }
00193 }
00194
00195
00196 Image<float> SoxChannel::getLinearResponse(int ori, int scl)
00197 {
00198 const Image<float> result = gabor(ori)->getImage(scl);
00199 ASSERT(result.initialized());
00200 return result;
00201 }
00202
00203
00204 Image<float> SoxChannel::getNonlinearResponse(int exc_ori, int exc_scl)
00205 {
00206 const Dims inp_dims = getInputDims();
00207
00208 const Image<float> exc_img = getLinearResponse(exc_ori, exc_scl);
00209
00210 ASSERT(exc_img.initialized());
00211
00212 const double exc_theta = gabor(exc_ori)->angle();
00213
00214 const double exc_frq = log(exc_img.getWidth() / double(inp_dims.w()));
00215
00216 Image<float> inh_pool(exc_img.getDims(), NO_INIT);
00217 inh_pool.clear(semiSaturation.getVal());
00218
00219 int kept = 0;
00220 int skipped = 0;
00221
00222 LINFO("orientation %d/%d, scale %d/%d",
00223 exc_ori+1, numChans(),
00224 exc_scl+1, numScales());
00225
00226 double totalFactorW = 0.0;
00227
00228 for (uint inh_ori = 0; inh_ori < numChans(); ++inh_ori)
00229 {
00230 const double inh_theta = gabor(inh_ori)->angle();
00231
00232 for (uint inh_scl = 0; inh_scl < numScales(); ++inh_scl)
00233 {
00234 const Image<float> inh_img = getLinearResponse(inh_ori, inh_scl);
00235
00236 const double inh_frq = log(inh_img.getWidth()/double(inp_dims.w()));
00237
00238 double theta_diff = exc_theta-inh_theta;
00239 if (theta_diff <= -90.0) theta_diff += 180.0;
00240 else if (theta_diff > 90.0) theta_diff -= 180.0;
00241
00242 const double factorW =
00243 exp(-squareOf(theta_diff)/(2*squareOf(thetaPoolWidth.getVal()))
00244 -squareOf(exc_frq-inh_frq)/(2*squareOf(omegaPoolWidth.getVal())));
00245
00246 totalFactorW += factorW;
00247
00248
00249
00250
00251
00252
00253 if (factorW < cutoff.getVal())
00254 {
00255 ++skipped;
00256 }
00257 else
00258 {
00259 ++kept;
00260
00261 LDEBUG("factorW: (%.1f,%f,%.1f,%f) %f",
00262 exc_theta, exc_frq, inh_theta, inh_frq, factorW);
00263
00264
00265 inplaceAddWeighted(inh_pool,
00266 getInhib(inh_ori, inh_scl, exc_scl,
00267 inh_pool.getDims(), inh_img),
00268 factorW);
00269 }
00270 }
00271 }
00272
00273 LINFO("total inh weight: %f", totalFactorW);
00274
00275 LINFO("cutoff (%f): kept %d, skipped %d", cutoff.getVal(), kept, skipped);
00276
00277 Image<float> result = toPower(exc_img, excitExponent.getVal()) / inh_pool;
00278
00279 return result;
00280 }
00281
00282
00283 Image<float> SoxChannel::combineOutputs()
00284 {
00285 Dims dims = getMapDims();
00286
00287 Image<float> result(dims, ZEROS);
00288
00289 for (uint exc_ori = 0; exc_ori < numChans(); ++exc_ori)
00290 {
00291 for (uint exc_scl = 0; exc_scl < numScales(); ++exc_scl)
00292 {
00293 Image<float> resp = getNonlinearResponse(exc_ori, exc_scl);
00294
00295 result += rescale(resp, dims);
00296 }
00297 }
00298
00299 if (itsNormType.getVal() == VCXNORM_MAXNORM)
00300 return maxNormalize(result, MAXNORMMIN, MAXNORMMAX, VCXNORM_MAXNORM);
00301 else
00302 return maxNormalize(result, 0.0f, 0.0f, itsNormType.getVal());
00303 }
00304
00305
00306 void SoxChannel::killCaches()
00307 {
00308 ComplexChannel::killCaches();
00309
00310 std::vector<Cache>().swap(inhibCaches);
00311 }
00312
00313
00314 Image<float> SoxChannel::getInhib(int ori, int scl, int exc_scl,
00315 const Dims& dims,
00316 const Image<float>& linearResponse)
00317 {
00318 if (int(inhibCaches.size()) <= exc_scl)
00319 inhibCaches.resize(exc_scl+1);
00320
00321 Cache& cache = inhibCaches[exc_scl];
00322
00323 const LevelSpec ls = gabor(0)->getModelParamVal<LevelSpec>("LevelSpec");
00324 const unsigned int nscale = ls.maxDepth();
00325
00326 unsigned int index = scl * nscale + ori;
00327
00328 if (cache.size() <= index)
00329 cache.resize(index+1);
00330
00331 if (!cache[index].img.initialized())
00332 {
00333 cache[index] =
00334 CacheElem(rescale(toPower(linearResponse, inhibExponent.getVal()),
00335 dims),
00336 ori, scl, exc_scl);
00337 }
00338
00339 const CacheElem& elem = cache[index];
00340
00341 ASSERT(elem.ori == ori);
00342 ASSERT(elem.scl == scl);
00343 ASSERT(elem.exc_scl == exc_scl);
00344
00345 return elem.img;
00346 }
00347
00348
00349
00350
00351
00352