00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #ifndef CHANNELS_SPECTRALRESIDUALCHANNEL_C_DEFINED
00039 #define CHANNELS_SPECTRALRESIDUALCHANNEL_C_DEFINED
00040
00041 #include "Channels/SpectralResidualChannel.H"
00042
00043 #include "Channels/ChannelOpts.H"
00044 #include "Channels/ChannelVisitor.H"
00045 #include "Component/ModelOptionDef.H"
00046 #include "Image/FilterOps.H"
00047 #include "Image/FourierEngine.H"
00048 #include "Image/Kernels.H"
00049 #include "Image/MathOps.H"
00050 #include "Image/ShapeOps.H"
00051 #include "Transport/FrameInfo.H"
00052 #include "Transport/FrameOstream.H"
00053 #include "rutz/trace.h"
00054
00055
00056 const ModelOptionDef OPT_SpectralResidualChannelSaveOutputMap =
00057 { MODOPT_FLAG, "SpectralResidualChannelSaveOutputMap", &MOC_CHANNEL, OPTEXP_SAVE,
00058 "Save output maps from the Spectral Residual channel (\"SRS\")",
00059 "save-srs-output", '\0', "", "false" };
00060
00061
00062 const ModelOptionDef OPT_SpectralResidualChannelSaveExtraOutput =
00063 { MODOPT_FLAG, "SpectralResidualChannelSaveExtraOutput", &MOC_CHANNEL, OPTEXP_SAVE,
00064 "Save additional output maps from the Spectral Residual channel",
00065 "save-srs-extra-output", '\0', "", "false" };
00066
00067
00068 const ModelOptionDef OPT_SpectralResidualChannelResizeSpec =
00069 { MODOPT_ARG(ResizeSpec), "SpectralResidualChannelResizeSpec", &MOC_CHANNEL, OPTEXP_CORE,
00070 "Specification for how the spectral residual channel should "
00071 "resize the input prior to the fft and subsequent operations. ",
00072 "srs-resize", '\0', "<None | WxH | *WFxHF | /WFxHF>", "64x64" };
00073
00074
00075 const ModelOptionDef OPT_SpectralResidualChannelSpectralBlur =
00076 { MODOPT_ARG(uint), "SpectralResidualChannelSpectralBlur", &MOC_CHANNEL, OPTEXP_CORE,
00077 "Size (in pixels) of the blur filter applied to the FFT "
00078 "log-magnitude image to produce the spectral residual image",
00079 "srs-spectral-blur", '\0', "<uint>", "3" };
00080
00081
00082 const ModelOptionDef OPT_SpectralResidualChannelOutputBlur =
00083 { MODOPT_ARG(float), "SpectralResidualChannelOutputBlur", &MOC_CHANNEL, OPTEXP_CORE,
00084 "Standard deviation of the Gaussian blur filter applied to the "
00085 "Spectral Residual channel output",
00086 "srs-output-blur", '\0', "<float>", "3.5" };
00087
00088
00089 const ModelOptionDef OPT_SpectralResidualChannelOutputBlurFactor =
00090 { MODOPT_ARG(double), "SpectralResidualOutputChannelBlurFactor", &MOC_CHANNEL, OPTEXP_CORE,
00091 "If non-zero, then this option overrides any value set with "
00092 "--srs-output-blur, such that the output blur filter width&height "
00093 "is set to this factor times the width&height of the channel's "
00094 "resized input.",
00095 "srs-output-blur-factor", '\0', "<double>", "0.0" };
00096
00097
00098 const ModelOptionDef OPT_SpectralResidualChannelDownSizeFilterWidth =
00099 { MODOPT_ARG(int), "SpectralResidualChannelDownSizeFilterWidth", &MOC_CHANNEL, OPTEXP_CORE,
00100 "Low-pass filter width used when downsizing input prior to "
00101 "processing in the Spectral Residual channel",
00102 "srs-lowpass-filter-width", '\0', "<int>", "8" };
00103
00104
00105 const ModelOptionDef OPT_SpectralResidualChannelAttenuationWidth =
00106 { MODOPT_ARG(double), "SpectralResidualChannelAttenuationWidth", &MOC_CHANNEL, OPTEXP_CORE,
00107 "Width across which the borders of the Spectral Residual output "
00108 "should be attenuated, expressed as a proportion of the output "
00109 "image size.",
00110 "srs-attenuation-width", '\0', "<double>", "0.0" };
00111
00112
00113 const ModelOptionDef OPT_SpectralResidualChannelOutputResize =
00114 { MODOPT_FLAG, "SpectralResidualChannelOutputResize", &MOC_CHANNEL, OPTEXP_CORE,
00115 "Whether or not to do any output resizing; if yes, then choose output "
00116 "dims according to --srs-output-resize-spec.",
00117 "srs-output-resize", '\0', "", "false" };
00118
00119
00120 const ModelOptionDef OPT_SpectralResidualChannelOutputResizeSpec =
00121 { MODOPT_ARG(ResizeSpec), "SpectralResidualChannelOutputResizeSpec", &MOC_CHANNEL, OPTEXP_CORE,
00122 "Specification for how the spectral residual channel should "
00123 "resize its output, relative to the original input size.",
00124 "srs-output-resize-spec", '\0', "<None | WxH | *WFxHF | /WFxHF>", "None" };
00125
00126
00127 const ModelOptionDef OPT_SpectralResidualChannelHiboostBypass =
00128 { MODOPT_FLAG, "SpectralResidualChannelHiboostBypass", &MOC_CHANNEL, OPTEXP_CORE,
00129 "Whether to bypass spectral-residual computations with a simple "
00130 "high-frequency boost instead.",
00131 "srs-hiboost-bypass", '\0', "", "false" };
00132
00133
00134 const ModelOptionDef OPT_SpectralResidualChannelGradientBypass =
00135 { MODOPT_FLAG, "SpectralResidualChannelGradientBypass", &MOC_CHANNEL, OPTEXP_CORE,
00136 "Whether to bypass spectral-residual computations with a simple "
00137 "gradient magnitude computation instead.",
00138 "srs-gradient-bypass", '\0', "", "false" };
00139
00140
00141 static Image<complexd> joinLogampliPhase(const Image<float>& logampli,
00142 const Image<double>& phase)
00143 {
00144 GVX_TRACE("SpectralResidualChannel::joinLogampliPhase");
00145
00146 ASSERT(logampli.getDims() == phase.getDims());
00147
00148 Image<complexd> newFFT(phase.getDims(), NO_INIT);
00149
00150 const int size = newFFT.getSize();
00151
00152 Image<complexd>::iterator const dptr = newFFT.beginw();
00153 Image<float>::const_iterator const aptr = logampli.begin();
00154 Image<double>::const_iterator const pptr = phase.begin();
00155
00156
00157
00158 const double div = 1.0 / sqrt(size);
00159
00160 for (int i = 0; i < size; ++i)
00161 dptr[i] = std::exp(complexd(aptr[i], pptr[i])) * div;
00162
00163 return newFFT;
00164 }
00165
00166
00167 SpectralResidualChannel::Downsizer::Downsizer()
00168 :
00169 itsInput(), itsFilterWidth(-1), itsPyr()
00170 {
00171 if (0 != pthread_mutex_init(&itsMutex, NULL))
00172 PLFATAL("pthread_mutex_init() failed");
00173 }
00174
00175
00176 SpectralResidualChannel::Downsizer::~Downsizer()
00177 {
00178 if (0 != pthread_mutex_destroy(&itsMutex))
00179 PLERROR("pthread_mutex_destroy() failed");
00180 }
00181
00182
00183 Image<float> SpectralResidualChannel::Downsizer::
00184 getDownsized(const Image<float>& x, int filtwidth, const Dims& newdims)
00185 {
00186 GVX_TRACE("SRS::Downsizer::getDownsized");
00187
00188 GVX_MUTEX_LOCK(&itsMutex);
00189
00190 if (!itsInput.hasSameData(x) || filtwidth != itsFilterWidth)
00191 {
00192 itsInput = x;
00193 itsFilterWidth = filtwidth;
00194 itsPyr.resize(0);
00195 itsPyr.push_back(x);
00196 LINFO("initializing downsize cache");
00197 }
00198 else
00199 {
00200 LINFO("reusing downsize cache");
00201 }
00202
00203 while (itsPyr.back().getWidth() > newdims.w() * 2
00204 && itsPyr.back().getHeight() > newdims.h() * 2)
00205 {
00206 Image<float> nextlev = itsPyr.back();
00207 if (itsFilterWidth == 1)
00208 {
00209 nextlev = decX(nextlev);
00210 nextlev = decY(nextlev);
00211 }
00212 else if (itsFilterWidth == 2)
00213 {
00214 nextlev = quickLocalAvg2x2(nextlev);
00215 }
00216 else
00217 {
00218 nextlev = decX(lowPassX(itsFilterWidth, nextlev));
00219 nextlev = decY(lowPassY(itsFilterWidth, nextlev));
00220 }
00221 itsPyr.push_back(nextlev);
00222 }
00223
00224 size_t pyrlev = 0;
00225
00226 while (itsPyr[pyrlev].getWidth() > newdims.w() * 2
00227 && itsPyr[pyrlev].getHeight() > newdims.h() * 2)
00228 {
00229 ASSERT(pyrlev + 1 < itsPyr.size());
00230 ++pyrlev;
00231 }
00232
00233 ASSERT(pyrlev < itsPyr.size());
00234 ASSERT(pyrlev == 0 || itsPyr[pyrlev].getWidth() >= newdims.w());
00235 ASSERT(pyrlev == 0 || itsPyr[pyrlev].getHeight() >= newdims.h());
00236 ASSERT(itsPyr[pyrlev].getWidth() <= newdims.w() * 2
00237 || itsPyr[pyrlev].getHeight() <= newdims.h() * 2);
00238
00239 return rescaleBilinear(itsPyr[pyrlev], newdims);
00240 }
00241
00242
00243 SpectralResidualChannel::SpectralResidualChannel
00244 (OptionManager& mgr,
00245 const std::string& descrName,
00246 const std::string& tagName)
00247 :
00248 ChannelBase(mgr, descrName, tagName, UNKNOWN),
00249 itsSaveOutput(&OPT_SpectralResidualChannelSaveOutputMap, this),
00250 itsSaveExtraOutput(&OPT_SpectralResidualChannelSaveExtraOutput, this),
00251 itsResizeSpec(&OPT_SpectralResidualChannelResizeSpec, this),
00252 itsSpectralBlur(&OPT_SpectralResidualChannelSpectralBlur, this),
00253 itsOutputBlur(&OPT_SpectralResidualChannelOutputBlur, this),
00254 itsOutputBlurFactor(&OPT_SpectralResidualChannelOutputBlurFactor, this),
00255 itsDownSizeFilterWidth(&OPT_SpectralResidualChannelDownSizeFilterWidth, this),
00256 itsAttenuationWidth(&OPT_SpectralResidualChannelAttenuationWidth, this),
00257 itsDoResizeOutput(&OPT_SpectralResidualChannelOutputResize, this),
00258 itsOutputResizeSpec(&OPT_SpectralResidualChannelOutputResizeSpec, this),
00259 itsNormType(&OPT_MaxNormType, this),
00260 itsOutputRangeMin(&OPT_ChannelOutputRangeMin, this),
00261 itsOutputRangeMax(&OPT_ChannelOutputRangeMax, this),
00262 itsHiboostBypass(&OPT_SpectralResidualChannelHiboostBypass, this),
00263 itsGradientBypass(&OPT_SpectralResidualChannelGradientBypass, this),
00264 itsDownsizer(new Downsizer),
00265 itsFFT(0),
00266 itsIFFT(0),
00267 itsInput(),
00268 itsOutput()
00269 {
00270 }
00271
00272
00273 SpectralResidualChannel::~SpectralResidualChannel()
00274 {
00275 delete itsFFT;
00276 delete itsIFFT;
00277 }
00278
00279
00280 void SpectralResidualChannel::accept(ChannelVisitor& v)
00281 {
00282 v.visitChannelBase(*this);
00283 }
00284
00285
00286 bool SpectralResidualChannel::isHomogeneous() const
00287 {
00288 return true;
00289 }
00290
00291
00292 void SpectralResidualChannel::readFrom(const ParamMap& pmap)
00293 {
00294 ChannelBase::readFrom(pmap);
00295 }
00296
00297
00298 void SpectralResidualChannel::writeTo(ParamMap& pmap) const
00299 {
00300 ChannelBase::writeTo(pmap);
00301 }
00302
00303
00304 bool SpectralResidualChannel::outputAvailable() const
00305 {
00306 return itsInput.initialized();
00307 }
00308
00309
00310 Dims SpectralResidualChannel::getMapDims() const
00311 {
00312 return
00313 itsDoResizeOutput.getVal()
00314 ? itsOutputResizeSpec.getVal().transformDims(this->getInputDims())
00315 : itsResizeSpec.getVal().transformDims(this->getInputDims());
00316 }
00317
00318
00319 uint SpectralResidualChannel::numSubmaps() const
00320 {
00321 return 1;
00322 }
00323
00324
00325 Image<float> SpectralResidualChannel::getSubmap(const uint index) const
00326 {
00327 if (index == 0)
00328 return const_cast<SpectralResidualChannel*>(this)->getOutput();
00329
00330 LFATAL("submap index %u out of range; I have only %u submap(s)",
00331 index, this->numSubmaps());
00332
00333 return Image<float>();
00334 }
00335
00336
00337 std::string SpectralResidualChannel::getSubmapName(const uint index) const
00338 {
00339 if (index == 0) return "SpectralResidual";
00340
00341 LFATAL("submap index %u out of range; I have only %u submap(s)",
00342 index, this->numSubmaps());
00343
00344 return std::string();
00345 }
00346
00347
00348 std::string SpectralResidualChannel::getSubmapNameShort(const uint index) const
00349 {
00350 if (index == 0) return "SpecRes";
00351
00352 LFATAL("submap index %u out of range; I have only %u submap(s)",
00353 index, this->numSubmaps());
00354
00355 return std::string();
00356 }
00357
00358
00359 void SpectralResidualChannel::getFeatures(const Point2D<int>& locn,
00360 std::vector<float>& mean) const
00361 {
00362 LFATAL("not implemented");
00363 }
00364
00365
00366 void SpectralResidualChannel::getFeaturesBatch(std::vector<Point2D<int>*> *locn,
00367 std::vector<std::vector<float> > *mean,
00368 int *count) const
00369 {
00370 LFATAL("not implemented");
00371 }
00372
00373
00374 Image<float> SpectralResidualChannel::getOutput()
00375 {
00376 GVX_TRACE("SRS::getOutput");
00377
00378 if (!itsInput.initialized())
00379 LFATAL("I have no input yet!");
00380
00381 if (!itsOutput.initialized())
00382 {
00383 const Dims newdims =
00384 itsResizeSpec.getVal().transformDims(itsInput.getDims());
00385
00386 itsRescaledInput =
00387 itsDownsizer->getDownsized(itsInput,
00388 itsDownSizeFilterWidth.getVal(),
00389 newdims);
00390
00391 if (itsFFT == 0)
00392 itsFFT = new FourierEngine<double>(itsRescaledInput.getDims());
00393
00394 if (itsIFFT == 0)
00395 itsIFFT = new FourierInvEngine<double>(itsRescaledInput.getDims());
00396
00397 if (itsGradientBypass.getVal())
00398 {
00399 if (itsHiboostBypass.getVal())
00400 LFATAL("can't use both --%s and --%s",
00401 itsHiboostBypass.getOptionDef()->longoptname,
00402 itsGradientBypass.getOptionDef()->longoptname);
00403
00404 itsProtoSaliencyMap = gradientmag(itsRescaledInput);
00405 }
00406 else
00407 {
00408 const Image<complexd> myFFT = itsFFT->fft(itsRescaledInput);
00409 itsLogMagnitude = Image<float>(logmagnitude(myFFT));
00410 itsPhase = phase(myFFT);
00411
00412 const int size = myFFT.getSize();
00413
00414 if (itsHiboostBypass.getVal())
00415 {
00416 if (itsGradientBypass.getVal())
00417 LFATAL("can't use both --%s and --%s",
00418 itsHiboostBypass.getOptionDef()->longoptname,
00419 itsGradientBypass.getOptionDef()->longoptname);
00420
00421
00422
00423
00424 itsSpectralResidual.resize(itsLogMagnitude.getDims());
00425
00426 const int w = itsLogMagnitude.getWidth();
00427 const int h = itsLogMagnitude.getHeight();
00428
00429 Image<float>::const_iterator sptr = itsLogMagnitude.begin();
00430 Image<float>::iterator dptr = itsSpectralResidual.beginw();
00431
00432 const double logsize = log(size);
00433
00434 for (int y = 0; y < h; ++y)
00435 for (int x = 0; x < w; ++x)
00436 {
00437 const int yf = std::min(y, h-y);
00438
00439 const double fsq = x*x + yf*yf;
00440
00441 *dptr++ = (*sptr++) + 0.5 * log(fsq) - logsize;
00442 }
00443 }
00444 else if (itsSpectralBlur.getVal() < 1)
00445 {
00446 LFATAL("--%s must be >= 1",
00447 itsSpectralBlur.getOptionDef()->longoptname);
00448 }
00449 else if (itsSpectralBlur.getVal() == 1)
00450 {
00451 itsSpectralResidual.resize(itsLogMagnitude.getDims(), true);
00452 }
00453 else
00454 {
00455 Image<float> filt(itsSpectralBlur.getVal(), 1, ZEROS);
00456 filt.clear(1.0f / itsSpectralBlur.getVal());
00457
00458 itsSpectralResidual =
00459 itsLogMagnitude - sepFilter(itsLogMagnitude, filt, filt,
00460 CONV_BOUNDARY_REPLICATE);
00461 }
00462
00463 const Image<complexd> newFFT = joinLogampliPhase(itsSpectralResidual, itsPhase);
00464
00465 itsProtoSaliencyMap = itsIFFT->ifft(newFFT);
00466 }
00467
00468 if (MYLOGVERB >= LOG_DEBUG)
00469 {
00470 float mi1, ma1; getMinMax(itsRescaledInput, mi1, ma1);
00471 float mi, ma; getMinMax(itsProtoSaliencyMap, mi, ma);
00472 LDEBUG("input range %f .. %f; proto range %f .. %f",
00473 mi1, ma1, mi, ma);
00474 }
00475
00476 itsProtoSaliencyMap = squared(itsProtoSaliencyMap);
00477
00478 if (itsAttenuationWidth.getVal() > 0.0)
00479 {
00480 const int w = int(0.5
00481 + itsAttenuationWidth.getVal()
00482 * itsProtoSaliencyMap.getDims().max());
00483
00484 inplaceAttenuateBorders(itsProtoSaliencyMap, w);
00485 }
00486
00487 float fw =
00488 itsOutputBlurFactor.getVal() > 0.0
00489 ? itsOutputBlurFactor.getVal() * itsProtoSaliencyMap.getWidth()
00490 : itsOutputBlur.getVal();
00491
00492 float fh =
00493 itsOutputBlurFactor.getVal() > 0.0
00494 ? itsOutputBlurFactor.getVal() * itsProtoSaliencyMap.getHeight()
00495 : itsOutputBlur.getVal();
00496
00497 itsOutput = itsProtoSaliencyMap;
00498
00499 const Dims mapdims = this->getMapDims();
00500 ASSERT(mapdims.isNonEmpty());
00501
00502
00503
00504
00505
00506
00507 while (itsOutput.getWidth() >= mapdims.w()*2
00508 && itsOutput.getHeight() >= mapdims.h()*2)
00509 {
00510 itsOutput = quickLocalAvg2x2(itsOutput);
00511 fw /= 2.0f;
00512 fh /= 2.0f;
00513 }
00514
00515 Image<float> wfilt = gaussian<float>(0.0f, fw, 0, 1.0f);
00516 wfilt = wfilt / float(sum(wfilt));
00517
00518 Image<float> hfilt = gaussian<float>(0.0f, fh, 0, 1.0f);
00519 hfilt = hfilt / float(sum(hfilt));
00520
00521 LDEBUG("wfilt is %dx%d, hfilt is %dx%d",
00522 wfilt.getWidth(), wfilt.getHeight(),
00523 hfilt.getWidth(), hfilt.getHeight());
00524
00525 itsOutput = sepFilter(itsOutput, wfilt, hfilt,
00526 CONV_BOUNDARY_ZERO);
00527
00528
00529
00530 itsOutput = rescaleBilinear(itsOutput, mapdims);
00531
00532 itsOutput = maxNormalize(itsOutput,
00533 itsOutputRangeMin.getVal(),
00534 itsOutputRangeMax.getVal(),
00535 itsNormType.getVal());
00536 LINFO("%s OK: in=%dx%d -> resize %s -> internal=%dx%d -> resize %s -> out=%dx%d; "
00537 "spectral blur %u; output blur %gx%g; lpwidth %d; atten width %g",
00538 this->descriptiveName().c_str(),
00539 itsInput.getWidth(), itsInput.getHeight(),
00540 convertToString(itsResizeSpec.getVal()).c_str(),
00541 newdims.w(), newdims.h(),
00542 itsDoResizeOutput.getVal()
00543 ? convertToString(itsOutputResizeSpec.getVal()).c_str()
00544 : "skip",
00545 mapdims.w(), mapdims.h(),
00546 itsSpectralBlur.getVal(), fw, fh,
00547 itsDownSizeFilterWidth.getVal(),
00548 itsAttenuationWidth.getVal());
00549 }
00550
00551 return itsOutput;
00552 }
00553
00554
00555 void SpectralResidualChannel::saveResults(const nub::ref<FrameOstream>& ofs)
00556 {
00557 const std::string tag = this->tagName();
00558
00559 if (itsSaveOutput.getVal())
00560 ofs->writeFloat(this->getOutput(), FLOAT_NORM_PRESERVE,
00561 tag+"-",
00562 FrameInfo("Spectral Residual output",
00563 SRC_POS));
00564
00565 if (itsSaveExtraOutput.getVal())
00566 {
00567 ofs->writeGray(Image<byte>(itsInput), tag+"-input",
00568 FrameInfo("Spectral Residual input", SRC_POS));
00569
00570 ofs->writeFloat(itsRescaledInput, FLOAT_NORM_PRESERVE,
00571 tag+"-rescaled-input",
00572 FrameInfo("Spectral Residual rescaled input", SRC_POS));
00573
00574 ofs->writeFloat(itsLogMagnitude, FLOAT_NORM_PRESERVE,
00575 tag+"-logmagnitude",
00576 FrameInfo("Spectral Residual fft log-magnitude",
00577 SRC_POS));
00578
00579 ofs->writeFloat(itsPhase, FLOAT_NORM_PRESERVE,
00580 tag+"-phase",
00581 FrameInfo("Spectral Residual fft phase",
00582 SRC_POS));
00583
00584 ofs->writeFloat(itsSpectralResidual, FLOAT_NORM_PRESERVE,
00585 tag+"-spectral-residual",
00586 FrameInfo("Spectral Residual fft spectral residual",
00587 SRC_POS));
00588
00589 ofs->writeFloat(itsProtoSaliencyMap, FLOAT_NORM_PRESERVE,
00590 tag+"-proto-saliency",
00591 FrameInfo("Spectral Residual proto-saliency",
00592 SRC_POS));
00593 }
00594 }
00595
00596
00597 void SpectralResidualChannel::killCaches()
00598 {
00599 itsOutput = Image<float>();
00600
00601
00602 itsRescaledInput.freeMem();
00603 itsLogMagnitude.freeMem();
00604 itsPhase.freeMem();
00605 itsSpectralResidual.freeMem();
00606 itsProtoSaliencyMap.freeMem();
00607 }
00608
00609
00610 void SpectralResidualChannel::doInput(const InputFrame& inframe)
00611 {
00612 if (!inframe.grayFloat().initialized())
00613 LFATAL("Oops! I need luminance input");
00614
00615 itsInput = inframe.grayFloat();
00616 }
00617
00618
00619
00620
00621
00622
00623
00624
00625 #endif // CHANNELS_SPECTRALRESIDUALCHANNEL_C_DEFINED