ScaleSpace.C

Go to the documentation of this file.
00001 /*!@file SIFT/ScaleSpace.C Keypoint computation for SIFT obj recognition */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the //
00005 // University of Southern California (USC) and the iLab at USC.         //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: James Bonaiuto <bonaiuto@usc.edu>
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/SIFT/ScaleSpace.C $
00035 // $Id: ScaleSpace.C 10423 2008-11-12 22:26:07Z mviswana $
00036 //
00037 
00038 #include "SIFT/ScaleSpace.H"
00039 
00040 #include "Image/FilterOps.H"
00041 #include "Image/Kernels.H"
00042 #include "Image/MathOps.H"
00043 #include "Image/MatrixOps.H"
00044 #include "Image/DrawOps.H"
00045 #include "SIFT/Histogram.H"
00046 #include "SIFT/Keypoint.H"
00047 #include "SIFT/FeatureVector.H"
00048 #include "rutz/compat_cmath.h"
00049 #include "Image/fancynorm.H"
00050 
00051 const int   EXT          = 10;     //!< keep out edge width
00052 const float R_EDGE       = 8.0F;   //!< edge response threshold
00053 const float PEAK_THRESH  = 2.0F;   //!< peak below that doesn't count
00054 const int   ORIENTARRAY  = 36;     //!< size of orientation array
00055 
00056 
00057 // ######################################################################
00058 ScaleSpace::ScaleSpace(const ImageSet<float>& in, const float octscale,
00059                 const int s, const float sigma, bool useColor) :
00060         itsOctScale(octscale), itsSigma(sigma), itsLumBlur(s + 3), itsRGBlur(s+3),
00061         itsBYBlur(s+3), itsDog(s + 2), itsUseColor(useColor)
00062 {
00063         ASSERT(s > 0); ASSERT(octscale > 0.0F); ASSERT(sigma > 0.0F);
00064 
00065         const float k = powf(2.0f, 1.0f / s);
00066 
00067         // our bottom image is just the original. We here assume that the
00068         // caller has pre-blurred it if necessary. See the constructor of
00069         // VisualObject for what we mean by that:
00070         itsLumBlur[0] = in[LUM_CHANNEL];
00071 
00072         if (itsUseColor){
00073                 itsRGBlur[0] = in[RG_CHANNEL];
00074                 itsBYBlur[0] = in[BY_CHANNEL];
00075         }
00076 
00077         // compute the additional gaussian blurred maps:
00078         for (int ss = 1; ss < s+3; ++ss)
00079         {
00080                 // compute the stdev of a Gaussian we should convolve the
00081                 // previous blurred image by, so as to multiply the total
00082                 // effective blurring sigma by a factor k:
00083                 const float std = sigma * powf(k, float(ss-1)) * sqrt(k*k - 1.0F);
00084 
00085                 // get a Gaussian kernel by that stdev, normalized to unit sum:
00086                 Image<float> kernel = gaussian<float>(1.0F, std, itsLumBlur[0].getWidth(), 1.0F);
00087                 kernel = kernel / float(sum(kernel));
00088 
00089                 // do the convolution:
00090                 itsLumBlur[ss] = sepFilter(itsLumBlur[ss-1], kernel, kernel,
00091                                            CONV_BOUNDARY_CLEAN);
00092 
00093                 if (itsUseColor){
00094                   itsRGBlur[ss] = sepFilter(itsRGBlur[ss-1], kernel, kernel,
00095                                             CONV_BOUNDARY_CLEAN);
00096                   itsBYBlur[ss] = sepFilter(itsBYBlur[ss-1], kernel, kernel,
00097                                             CONV_BOUNDARY_CLEAN);
00098                 }
00099         }
00100 
00101         // compute the difference of gaussian images:
00102         for (int ss = 0; ss < s+2; ++ss)
00103                 itsDog[ss] = itsLumBlur[ss+1] - itsLumBlur[ss];
00104 
00105 
00106         /*
00107         //take the max normalize
00108 #define NORMTYPE VCXNORM_MAXNORM
00109 for (int ss=0; ss < s+3; ++ss){
00110 itsLumBlur[ss] = maxNormalize(itsLumBlur[ss], 0.0F, 2.0F, NORMTYPE);
00111 itsRGBlur[ss] = maxNormalize(itsRGBlur[ss], 0.0F, 2.0F, NORMTYPE);
00112 itsBYBlur[ss] = maxNormalize(itsBYBlur[ss], 0.0F, 2.0F, NORMTYPE);
00113 }*/
00114                 }
00115 
00116 // ######################################################################
00117 ScaleSpace::~ScaleSpace()
00118 { }
00119 
00120 // ######################################################################
00121 Image<float> ScaleSpace::getTwoSigmaImage(int channel) const
00122 {
00123         //LINFO("Channel select");
00124         switch (channel){
00125                 case LUM_CHANNEL:
00126                         return itsLumBlur.getImage(itsLumBlur.size() - 3);
00127                 case RG_CHANNEL:
00128                         return itsRGBlur.getImage(itsRGBlur.size() - 3);
00129                 case BY_CHANNEL:
00130                         return itsRGBlur.getImage(itsBYBlur.size() - 3);
00131         }
00132 
00133         LINFO("Invalid channel %i", channel);
00134         ASSERT(false);
00135         return itsLumBlur.getImage(itsLumBlur.size() - 3);
00136 }
00137 
00138 
00139 // ######################################################################
00140 uint ScaleSpace::getNumBlurredImages() const
00141 { return itsLumBlur.size(); }
00142 
00143 // ######################################################################
00144 Image<float> ScaleSpace::getBlurredImage(const uint idx) const
00145 { return itsLumBlur.getImage(idx); }
00146 
00147 // ######################################################################
00148 uint ScaleSpace::getNumDoGImages() const
00149 { return itsDog.size(); }
00150 
00151 // ######################################################################
00152 Image<float> ScaleSpace::getDoGImage(const uint idx) const
00153 { return itsDog.getImage(idx); }
00154 
00155 // ######################################################################
00156 uint ScaleSpace::findKeypoints(std::vector< rutz::shared_ptr<Keypoint> >& keypoints)
00157 {
00158         int w = itsLumBlur[0].getWidth(), h = itsLumBlur[0].getHeight();
00159         Image<byte> analyzed(w, h, ZEROS); // keep track of visited locations
00160         uint numkp = 0;
00161 
00162         // loop through blurred images in scale space:
00163         for (uint sc = 1; sc <= itsLumBlur.size()-3; sc++)
00164         {
00165                 LDEBUG("Processing octave %.2f scale %d/%d", itsOctScale,
00166                                 sc, itsLumBlur.size()-3);
00167 
00168                 // compute magnitude and orientation of the gradient of the
00169                 // blurred image for the scale of interest:
00170                 ImageSet<float> gradmag(3), gradori(3);
00171                 gradient(itsLumBlur[sc], gradmag[LUM_CHANNEL], gradori[LUM_CHANNEL]);
00172 
00173                 if (itsUseColor){
00174                         gradient(itsRGBlur[sc], gradmag[RG_CHANNEL], gradori[RG_CHANNEL]);
00175                         gradient(itsBYBlur[sc], gradmag[BY_CHANNEL], gradori[BY_CHANNEL]);
00176 
00177                 }
00178 
00179 
00180                 // for every pixels in image except for edges
00181                 for (int x = EXT; x < w - EXT; x++)
00182                         for (int y = EXT; y < h - EXT; y++)
00183                                 // check for maximum or minimum of DoG in scale space;
00184                                 // if found, trigger accurate localization and pruning
00185                                 // of keypoint:
00186                                 if (checkForMinMax(x, y, itsDog[sc-1], itsDog[sc], itsDog[sc+1]))
00187                                         numkp += accurateLocalizationAndPruning(x, y, sc, analyzed,
00188                                                         gradmag, gradori,
00189                                                         keypoints);
00190         }
00191         return numkp;
00192 }
00193 
00194 // ######################################################################
00195 
00196 // Custom and highly irregular version of SIFT keypoint creation. This
00197 // was written specifically for the Lazebnik Beyond Bags-of-Features
00198 // paper. Works only for level zero. Thou hast been warned.
00199 //
00200 // For more info, see src/Neuro/GistEstimatorBeyondBoF.H and .C; also,
00201 // src/Gist/train-bbof.C.
00202 static void
00203 pixelPatchCreateKeypoint(const float x, const float y, const float sigma,
00204                          const float dogmag, const Image<float>& gradmag,
00205                          const Image<float>& gradorie,
00206                          std::vector<rutz::shared_ptr<Keypoint> >& keypoints)
00207 {
00208    const int xi = int(x + 0.5f);
00209    const int yi = int(y + 0.5f);
00210 
00211    FeatureVector fv;
00212 
00213    // check this scale
00214    const int radius = int(5.0f * sigma + 0.5f); // NOTE: Lowe uses radius=8?
00215    const float gausssig = float(radius); // 1/2 width of descript window
00216    const float gaussfac = -0.5f/(gausssig * gausssig);
00217 
00218    // Scan a window of diameter 2*radius+1 around the point of
00219    // interest, and we will cumulate local samples into a 4x4 grid
00220    // of bins, with interpolation. NOTE: rx and ry loop over a
00221    // square that is assumed centered around the point of interest.
00222    for (int ry = -radius; ry < radius; ry++)
00223       for (int rx = -radius; rx < radius; rx++)
00224       {
00225          // get the coords in the image frame of reference:
00226          const float orgX = rx + float(xi);
00227          const float orgY = ry + float(yi);
00228 
00229          if (! gradmag.coordsOk(orgX, orgY)) // outside image
00230             continue; // forget this coordinate
00231 
00232          // find the fractional coords of the corresponding bin
00233          // (we subdivide our window into a 4x4 grid of bins):
00234          const float xf = 2.0f + 2.0f * float(rx)/float(radius);
00235          const float yf = 2.0f + 2.0f * float(ry)/float(radius);
00236 
00237          // find the Gaussian weight from distance to center and
00238          // get weighted gradient magnitude:
00239          const float gaussFactor = expf((rx*rx + ry*ry) * gaussfac);
00240          const float weightedMagnitude =
00241             gaussFactor * gradmag.getValInterp(orgX, orgY);
00242 
00243          // get the gradient orientation relative to the keypoint
00244          // orientation and scale it for 8 orientation bins:
00245          float gradAng = gradorie.getValInterp(orgX, orgY);
00246          gradAng=fmod(gradAng, 2*M_PI); //bring the range from 0 to M_PI
00247 
00248          //convert from -M_PI to M_PI
00249          if (gradAng < 0.0)
00250             gradAng += 2*M_PI; //convert to -M_PI to M_PI
00251          if (gradAng >= M_PI)
00252             gradAng -= 2*M_PI;
00253          //split to eight bins
00254          const float orient = (gradAng + M_PI) * 8 / (2 * M_PI);
00255 
00256          // will be interpolated into 2 x 2 x 2 bins:
00257          fv.addValue(xf, yf, orient, weightedMagnitude);
00258       }
00259 
00260    // normalize, clamp, scale and convert to byte:
00261    std::vector<byte> oriVec;
00262    fv.toByteKey(oriVec);
00263 
00264    rutz::shared_ptr<Keypoint>
00265       newkey(new Keypoint(oriVec, x, y, sigma, 0, dogmag));
00266    keypoints.push_back(newkey);
00267 }
00268 
00269 uint
00270 ScaleSpace::
00271 getGridKeypoints(std::vector<rutz::shared_ptr<Keypoint> >& keypoints)
00272 {
00273    // compute magnitude and orientation of the gradient of the
00274    // blurred image for the scale of interest:
00275    Image<float> gradmag, gradori;
00276    gradient(itsLumBlur[0], gradmag, gradori);
00277 
00278    int w = itsLumBlur[0].getWidth(),
00279        h = itsLumBlur[0].getHeight();
00280    uint numkp = 0;
00281    const int radius = int(5.0f * itsSigma + 0.5f);
00282    for (int y = radius; y < h - radius + 1; y += radius)
00283       for (int x = radius; x < w - radius + 1; x += radius) {
00284          pixelPatchCreateKeypoint(x, y, itsSigma, 1,
00285                                   gradmag, gradori, keypoints);
00286          ++numkp ;
00287       }
00288 
00289    return numkp ;
00290 }
00291 
00292 // ######################################################################
00293 uint ScaleSpace::
00294 accurateLocalizationAndPruning(const int x, const int y, const int s,
00295                 Image<byte>& analyzed,
00296                 const ImageSet<float>& gradmag,
00297                 const ImageSet<float>& gradori,
00298                 std::vector< rutz::shared_ptr<Keypoint> >& keypoints)
00299 {
00300         // Interactive 3D interpolation (x direction, y direction, and scale
00301         // direction). We do a 3d fit of this extremum. The resulting dpos
00302         // contains the accurate fractional location of the extremum
00303         // relative to (new_x, new_y, s), and dDog is the derivative of DoG
00304         // at that point:
00305         int new_x = x, new_y = y, new_s = s;
00306         Image<float> dDog, dpos; float dx2, dy2, dxdy;
00307         dpos = fit3D(new_x, new_y, s, dDog, dx2, dy2, dxdy);
00308 
00309         // give up on some overflow cases:
00310         if (fabs(dpos.getVal(0)) > 1.5F) return 0;
00311         if (fabs(dpos.getVal(1)) > 1.5F) return 0;
00312         if (fabs(dpos.getVal(2)) > 1.5F) return 0;
00313 
00314         // If dpos is larger than 0.5 in x or y direction, we should move
00315         // our keypoint to a better location and try our 3D fit again:
00316         bool moved = false;
00317         if (dpos.getVal(0) > 0.5)       { ++new_x; moved = true; }
00318         else if (dpos.getVal(0) < -0.5) { --new_x; moved = true; }
00319 
00320         if (dpos.getVal(1) > 0.5)       { ++new_y; moved = true; }
00321         else if (dpos.getVal(1) < -0.5) { --new_y; moved = true; }
00322 
00323         if (dpos.getVal(2) > 0.5) {
00324                 if (new_s >= int(itsDog.size()) - 2) return 0; // can't go higher in scale
00325                 ++new_s; moved = true;
00326         } else if (dpos.getVal(2) < -0.5) {
00327                 if (new_s <= 1) return 0; // can't go lower in scale
00328                 --new_s; moved = true;
00329         }
00330 
00331         // if we moved, recompute the 3D fit:
00332         if (moved)
00333         {
00334                 dpos = fit3D(new_x, new_y, new_s, dDog, dx2, dy2, dxdy);
00335 
00336                 // give up if we can't get close:
00337                 if (fabs(dpos.getVal(0)) > 0.5F) return 0;
00338                 if (fabs(dpos.getVal(1)) > 0.5F) return 0;
00339                 if (fabs(dpos.getVal(2)) > 0.5F) return 0;
00340         }
00341 
00342         // if already analyzed then quit, otherwise mark as analyzed:
00343         if (analyzed.getVal(new_x, new_y)) return 0;
00344         analyzed.setVal(new_x, new_y, 255);
00345 
00346         // here are our new fractional coordinates:
00347         const float xf = float(new_x) + float(dpos.getVal(0));
00348         const float yf = float(new_y) + float(dpos.getVal(1));
00349         const float sf = float(new_s) + float(dpos.getVal(2));
00350 
00351         // calculate new peak height, using linear approximation:
00352         const float peak_height =
00353                 itsDog[new_s].getVal(new_x, new_y) + 0.5F * dotprod(dpos, dDog);
00354 
00355         // forget about that keypoint if the DoG value is too small
00356         // (low-contrast keypoint):
00357         if (fabsf(peak_height) < PEAK_THRESH ) return 0;
00358 
00359         // pruning big edge response (this will be used as denominator in
00360         // the following test):
00361         if (fabs(dx2 * dy2 - dxdy * dxdy) < 1.0e-5F) return 0;
00362 
00363         // calculate edge test, to eliminate keypoints along strong edges:
00364         if ((dx2+dy2)*(dx2+dy2) / fabs(dx2 * dy2 - dxdy * dxdy) >=
00365                         (R_EDGE + 1.0F) * (R_EDGE + 1.0F) / R_EDGE) return 0;
00366 
00367         // if we have reached this point, we got a stable keypoint!  Create
00368         // its feature vector:
00369         return createKeypoints(xf, yf, sf, peak_height, gradmag, gradori, keypoints);
00370 }
00371 
00372 // ######################################################################
00373 bool ScaleSpace::checkForMinMax(const int x, const int y,
00374                 const Image<float>& im0,
00375                 const Image<float>& im1,
00376                 const Image<float>& im2) const
00377 {
00378         // get value:
00379         float val = im1.getVal(x, y);
00380 
00381         // if below peak threshold just give up:
00382         if (fabsf(val) < PEAK_THRESH) return false;
00383 
00384         // verify for max or min:
00385         if (val < im1.getVal(x-1, y))
00386         {
00387                 // ok, then let's check for a minimum:
00388                 if (val >= im1.getVal(x-1, y-1)) return false;
00389                 if (val >= im1.getVal(x-1, y+1)) return false;
00390                 if (val >= im1.getVal(x  , y-1)) return false;
00391                 if (val >= im1.getVal(x  , y+1)) return false;
00392                 if (val >= im1.getVal(x+1, y-1)) return false;
00393                 if (val >= im1.getVal(x+1, y  )) return false;
00394                 if (val >= im1.getVal(x+1, y+1)) return false;
00395 
00396                 // check for min level -1:
00397                 if (val >= im0.getVal(x-1, y-1)) return false;
00398                 if (val >= im0.getVal(x-1, y  )) return false;
00399                 if (val >= im0.getVal(x-1, y+1)) return false;
00400                 if (val >= im0.getVal(x  , y-1)) return false;
00401                 if (val >= im0.getVal(x  , y  )) return false;
00402                 if (val >= im0.getVal(x  , y+1)) return false;
00403                 if (val >= im0.getVal(x+1, y-1)) return false;
00404                 if (val >= im0.getVal(x+1, y  )) return false;
00405                 if (val >= im0.getVal(x+1, y+1)) return false;
00406 
00407                 // check for min level +1:
00408                 if (val >= im2.getVal(x-1, y-1)) return false;
00409                 if (val >= im2.getVal(x-1, y  )) return false;
00410                 if (val >= im2.getVal(x-1, y+1)) return false;
00411                 if (val >= im2.getVal(x  , y-1)) return false;
00412                 if (val >= im2.getVal(x  , y  )) return false;
00413                 if (val >= im2.getVal(x  , y+1)) return false;
00414                 if (val >= im2.getVal(x+1, y-1)) return false;
00415                 if (val >= im2.getVal(x+1, y  )) return false;
00416                 if (val >= im2.getVal(x+1, y+1)) return false;
00417 
00418                 return true;
00419         }
00420         else if (val > im1.getVal(x-1, y))
00421         {
00422                 // check for maximum:
00423                 if (val <= im1.getVal(x-1, y-1)) return false;
00424                 if (val <= im1.getVal(x-1, y+1)) return false;
00425                 if (val <= im1.getVal(x  , y-1)) return false;
00426                 if (val <= im1.getVal(x  , y+1)) return false;
00427                 if (val <= im1.getVal(x+1, y-1)) return false;
00428                 if (val <= im1.getVal(x+1, y  )) return false;
00429                 if (val <= im1.getVal(x+1, y+1)) return false;
00430 
00431                 // check for max level -1:
00432                 if (val <= im0.getVal(x-1, y-1)) return false;
00433                 if (val <= im0.getVal(x-1, y  )) return false;
00434                 if (val <= im0.getVal(x-1, y+1)) return false;
00435                 if (val <= im0.getVal(x  , y-1)) return false;
00436                 if (val <= im0.getVal(x  , y  )) return false;
00437                 if (val <= im0.getVal(x  , y+1)) return false;
00438                 if (val <= im0.getVal(x+1, y-1)) return false;
00439                 if (val <= im0.getVal(x+1, y  )) return false;
00440                 if (val <= im0.getVal(x+1, y+1)) return false;
00441 
00442                 // check for max level +1:
00443                 if (val <= im2.getVal(x-1, y-1)) return false;
00444                 if (val <= im2.getVal(x-1, y  )) return false;
00445                 if (val <= im2.getVal(x-1, y+1)) return false;
00446                 if (val <= im2.getVal(x  , y-1)) return false;
00447                 if (val <= im2.getVal(x  , y  )) return false;
00448                 if (val <= im2.getVal(x  , y+1)) return false;
00449                 if (val <= im2.getVal(x+1, y-1)) return false;
00450                 if (val <= im2.getVal(x+1, y  )) return false;
00451                 if (val <= im2.getVal(x+1, y+1)) return false;
00452 
00453                 return true;
00454         }
00455 
00456         return false;
00457 }
00458 
00459 // ######################################################################
00460 //
00461 // Fit in 3D
00462 //   in the (X, Y, S) space, fit the peak, to find it's best extrenum
00463 //   return solution and dDog as first derivative of Dog
00464 //
00465 Image<float> ScaleSpace::fit3D(const int x, const int y, const int s,
00466                 Image<float>& dDog, float& dX2,
00467                 float& dY2, float& dXdY) const
00468 {
00469         Image<float> below = itsDog[s - 1];
00470         Image<float> cur   = itsDog[s];
00471         Image<float> above = itsDog[s + 1];
00472 
00473         // standard approximations of first derivatives of the DoG:
00474         float dX = 0.5F * (cur.getVal(x+1, y) - cur.getVal(x-1, y));
00475         float dY = 0.5F * (cur.getVal(x, y+1) - cur.getVal(x, y-1));
00476         float dS = 0.5F * (above.getVal(x, y) - below.getVal(x, y));
00477 
00478         // standard approximations of second derivatives of the DoG:
00479         dX2 = cur.getVal(x-1, y) - 2.0F*cur.getVal(x, y) + cur.getVal(x+1, y);
00480         dY2 = cur.getVal(x, y-1) - 2.0F*cur.getVal(x, y) + cur.getVal(x, y+1);
00481         float dS2 = below.getVal(x, y) - 2.0F*cur.getVal(x, y) + above.getVal(x, y);
00482 
00483         // standard approximation of crossed derivatives of the DoG:
00484         dXdY  = 0.25F * (cur.getVal(x+1, y+1) - cur.getVal(x-1, y+1)) -
00485                 0.25F * (cur.getVal(x+1, y-1) - cur.getVal(x-1, y-1));
00486         float dSdX = 0.25F * (above.getVal(x+1, y) - above.getVal(x-1, y)) -
00487                 0.25F * (below.getVal(x+1, y) - below.getVal(x-1, y));
00488         float dSdY = 0.25F * (above.getVal(x, y+1) - above.getVal(x, y-1)) -
00489                 0.25F * (below.getVal(x, y+1) - below.getVal(x, y-1));
00490 
00491         // matrix to solve is:
00492         // dX         [ dX2  dXdY dXdS ]
00493         // dY = [ V ] [ dXdY dY2  dYdS ]
00494         // dS         [ dXdS dYdS dS2  ]
00495         dDog.resize(1, 3);
00496         dDog.setVal(0, -dX); dDog.setVal(1, -dY); dDog.setVal(2, -dS);
00497 
00498         Image<float> mat(3, 3, NO_INIT);
00499         mat.setVal(0, 0,  dX2); mat.setVal(1, 1,  dY2); mat.setVal(2, 2,  dS2);
00500         mat.setVal(1, 0, dXdY); mat.setVal(0, 1, dXdY);
00501         mat.setVal(2, 0, dSdX); mat.setVal(0, 2, dSdX);
00502         mat.setVal(2, 1, dSdY); mat.setVal(1, 2, dSdY);
00503 
00504         Image<float> result;
00505         try
00506           {
00507             mat = matrixInv(mat);
00508             result = matrixMult(mat, dDog);
00509           }
00510         catch (SingularMatrixException& e)
00511           {
00512             LDEBUG("Couldn't invert matrix -- RETURNING [ 2.0 2.0 2.0 ]^T");
00513             result.resize(1, 3);
00514             result.setVal(0, 0, 2.0);
00515             result.setVal(0, 1, 2.0);
00516             result.setVal(0, 2, 2.0);
00517           }
00518         return result;
00519 }
00520 
00521 
00522 // ######################################################################
00523 
00524 void ScaleSpace::calculateOrientationVector(const float x, const float y, const float s,
00525                 const ImageSet<float>& gradmag, const ImageSet<float>& gradorie, Histogram& OV) {
00526 
00527 
00528         // compute the effective blurring sigma corresponding to the
00529         // fractional scale s:
00530         const float sigma = itsSigma * powf(2.0F, s / float(itsLumBlur.size() - 3));
00531 
00532         const float sig = 1.5F * sigma, inv2sig2 = - 0.5F / (sig * sig);
00533         const int dimX = gradmag[LUM_CHANNEL].getWidth(), dimY = gradmag[LUM_CHANNEL].getHeight();
00534 
00535         const int xi = int(x + 0.5f);
00536         const int yi = int(y + 0.5f);
00537 
00538         const int rad = int(3.0F * sig);
00539         const int rad2 = rad * rad;
00540 
00541 
00542         // search bounds:
00543         int starty = yi - rad; if (starty < 0) starty = 0;
00544         int stopy = yi + rad; if (stopy >= dimY) stopy = dimY-1;
00545 
00546         // 1. Calculate orientation vector
00547         for (int ind_y = starty; ind_y <= stopy; ind_y ++)
00548         {
00549                 // given that y, get the corresponding range of x values that
00550                 // lie within the disk (and remain within the image):
00551                 const int yoff = ind_y - yi;
00552                 const int bound = int(sqrtf(float(rad2 - yoff*yoff)) + 0.5F);
00553                 int startx = xi - bound; if (startx < 0) startx = 0;
00554                 int stopx = xi + bound; if (stopx >= dimX) stopx = dimX-1;
00555 
00556                 for (int ind_x = startx; ind_x <= stopx; ind_x ++)
00557                 {
00558                         const float dx = float(ind_x) - x, dy = float(ind_y) - y;
00559                         const float distSq = dx * dx + dy * dy;
00560 
00561                         // get gradient:
00562                         const float gradVal = gradmag[LUM_CHANNEL].getVal(ind_x, ind_y);
00563 
00564                         // compute the gaussian weight for this histogram entry:
00565                         const float gaussianWeight = expf(distSq * inv2sig2);
00566 
00567                         // add this orientation to the histogram
00568                         // [-pi ; pi] -> [0 ; 2pi]
00569                         float angle = gradorie[LUM_CHANNEL].getVal(ind_x, ind_y) + M_PI;
00570 
00571                         // [0 ; 2pi] -> [0 ; 36]
00572                         angle = 0.5F * angle * ORIENTARRAY / M_PI;
00573                         while (angle < 0.0F) angle += ORIENTARRAY;
00574                         while (angle >= ORIENTARRAY) angle -= ORIENTARRAY;
00575 
00576                         OV.addValueInterp(angle, gaussianWeight * gradVal);
00577                 }
00578         }
00579 
00580 
00581         // smooth the orientation histogram 3 times:
00582         for (int i = 0; i < 3; i++) OV.smooth();
00583 }
00584 
00585 // ######################################################################
00586 
00587 
00588 uint ScaleSpace::createVectorsAndKeypoints(const float x, const float y, const float s,
00589                 const float dogmag, const ImageSet<float>& gradmag,
00590                 const ImageSet<float>& gradorie, std::vector < rutz::shared_ptr<Keypoint> >& keypoints, Histogram& OV)
00591 {
00592 
00593         const float sigma = itsSigma * powf(2.0F, s / float(itsLumBlur.size() - 3));
00594 
00595         // find the max in the histogram:
00596         float maxPeakValue = OV.findMax();
00597 
00598         const int xi = int(x + 0.5f);
00599         const int yi = int(y + 0.5f);
00600 
00601         uint numkp = 0;
00602 
00603         // 2. Create feature vector and keypoint for each significant
00604         // orientation peak:
00605         for (int bin = 0; bin < ORIENTARRAY; bin++)
00606         {
00607                 // consider the peak centered around 'bin':
00608                 const float midval = OV.getValue(bin);
00609 
00610                 // if current value much smaller than global peak, forget it:
00611                 if (midval < 0.8F * maxPeakValue) continue;
00612 
00613                 // get value to the left of current value
00614                 const float leftval = OV.getValue((bin == 0) ? ORIENTARRAY-1 : bin-1);
00615 
00616                 // get value to the right of current value
00617                 const float rightval = OV.getValue((bin == ORIENTARRAY-1) ? 0 : bin+1);
00618 
00619                 // only consider local peaks:
00620                 if (leftval >= midval) continue;
00621                 if (rightval >= midval) continue;
00622 
00623                 // interpolate the values to get the orientation of the peak:
00624                 //  with f(x) = ax^2 + bx + c
00625                 //   f(-1) = x0 = leftval
00626                 //   f( 0) = x1 = midval
00627                 //   f(+1) = x2 = rightval
00628                 //  => a = (x0+x2)/2 - x1
00629                 //     b = (x2-x0)/2
00630                 //     c = x1
00631                 // f'(x) = 0 => x = -b/2a
00632                 const float a  = 0.5f * (leftval + rightval) - midval;
00633                 const float b  = 0.5f * (rightval - leftval);
00634                 float realangle = float(bin) - 0.5F * b / a;
00635 
00636                 realangle *= 2.0F * M_PI / ORIENTARRAY; // [0:36] to [0:2pi]
00637                 realangle -= M_PI;                      // [0:2pi] to [-pi:pi]
00638 
00639                 // ############ Create keypoint:
00640 
00641                 // compute the feature vector:
00642                 FeatureVector fv;
00643                 FeatureVector Colfv(4, 4, 3, false); //create 4x4x3 bids with no wrap
00644 
00645                 const float sinAngle = sin(realangle), cosAngle = cos(realangle);
00646 
00647                 // check this scale
00648                 const int radius = int(5.0F * sigma + 0.5F); // NOTE: Lowe uses radius=8?
00649                 const float gausssig = float(radius); // 1/2 width of descript window
00650                 const float gaussfac = - 0.5F / (gausssig * gausssig);
00651 
00652 
00653                 // Scan a window of diameter 2*radius+1 around the point of
00654                 // interest, and we will cumulate local samples into a 4x4 grid
00655                 // of bins, with interpolation. NOTE: rx and ry loop over a
00656                 // square that is assumed centered around the point of interest
00657                 // and rotated to the gradient orientation (realangle):
00658 
00659                 int scale = abs(int(s));
00660                 scale = scale > 5 ? 5 : scale;
00661 
00662                 float maxRG=-9999, minRG=9999, maxBY=-9999, minBY=9999, maxBW=-9999, minBW=9999;
00663 
00664                 for (int ry = -radius; ry <= radius; ry++)
00665                         for (int rx = -radius; rx <= radius; rx++)
00666                         {
00667                                 // rotate the point:
00668                                 const float newX = rx * cosAngle - ry * sinAngle;
00669                                 const float newY = rx * sinAngle + ry * cosAngle;
00670 
00671                                 // get the coords in the image frame of reference:
00672                                 const float orgX = newX + float(xi);
00673                                 const float orgY = newY + float(yi);
00674 
00675                                 // if outside the image, forget it:
00676                                 if (gradmag[LUM_CHANNEL].coordsOk(orgX, orgY) == false) continue;
00677 
00678                                 // find the fractional coords of the corresponding bin
00679                                 // (we subdivide our window into a 4x4 grid of bins):
00680                                 const float xf = 2.0F + 2.0F * float(rx) / float(radius);
00681                                 const float yf = 2.0F + 2.0F * float(ry) / float(radius);
00682 
00683 
00684                                 // find the Gaussian weight from distance to center and
00685                                 // get weighted gradient magnitude:
00686                                 const float gaussFactor = expf((newX*newX+newY*newY) * gaussfac);
00687                                 const float weightedMagnitude =
00688                                         gaussFactor * gradmag[LUM_CHANNEL].getValInterp(orgX, orgY);
00689 
00690                                 // get the gradient orientation relative to the keypoint
00691                                 // orientation and scale it for 8 orientation bins:
00692                                 float gradAng = gradorie[LUM_CHANNEL].getValInterp(orgX, orgY) - realangle;
00693 
00694                                 gradAng=fmod(gradAng, 2*M_PI); //bring the range from 0 to M_PI
00695 
00696                                 //convert from -M_PI to M_PI
00697                                 if (gradAng < 0.0) gradAng+=2*M_PI; //convert to -M_PI to M_PI
00698                                 if (gradAng >= M_PI) gradAng-=2*M_PI;
00699                                 //split to eight bins
00700                                 const float orient = (gradAng + M_PI) * 8 / (2 * M_PI);
00701 
00702                                 /*
00703                                 //reflect the angle to convert from 0 to M_PI
00704                                 if (gradAng >= M_PI) gradAng-=M_PI;
00705                                 //split to four bins
00706                                 const float orient = (gradAng + M_PI) * 4 / (2 * M_PI);
00707                                 */
00708 
00709                                 // will be interpolated into 2 x 2 x 2 bins:
00710                                 fv.addValue(xf, yf, orient, weightedMagnitude);
00711 
00712 
00713                                 if (itsUseColor){
00714 
00715                                         /*        float bw = gradmag[LUM_CHANNEL].getValInterp(orgX, orgY);
00716                                                                                 float rg = gradmag[RG_CHANNEL].getValInterp(orgX, orgY);
00717                                                                                 float by = gradmag[BY_CHANNEL].getValInterp(orgX, orgY); */
00718 
00719                                         float bw = itsLumBlur[scale].getValInterp(orgX, orgY);
00720                                         float rg = itsRGBlur[scale].getValInterp(orgX, orgY);
00721                                         float by = itsBYBlur[scale].getValInterp(orgX, orgY);
00722 
00723 
00724                                         Colfv.addValue(xf, yf, 0.5, bw*gaussFactor);
00725                                         Colfv.addValue(xf, yf, 1.5, rg*gaussFactor);
00726                                         Colfv.addValue(xf, yf, 2.5, by*gaussFactor);
00727 
00728 
00729                                         //find the max and min of the colors
00730                                         if (bw > maxBW) maxBW = bw;
00731                                         if (bw < minBW) minBW = bw;
00732 
00733                                         if (rg > maxRG) maxRG = rg;
00734                                         if (rg < minRG) minRG = rg;
00735 
00736                                         if (by > maxBY) maxBY = by;
00737                                         if (by < minBY) minBY = by;
00738 
00739 
00740                                 }
00741 
00742 
00743                         }
00744 
00745                 // normalize, clamp, scale and convert to byte:
00746                 std::vector<byte> oriVec;
00747                 fv.toByteKey(oriVec);
00748 
00749 
00750                 if (itsUseColor){
00751 
00752                         std::vector<byte> colVec;
00753                         Colfv.toByteKey(colVec, -1, true);
00754                         /*//put the max and min colors
00755                                 colVec.push_back(byte(maxBW));
00756                                 colVec.push_back(byte(minBW));
00757                                 colVec.push_back(byte(maxRG));
00758                                 colVec.push_back(byte(minRG));
00759                                 colVec.push_back(byte(maxBY));
00760                                 colVec.push_back(byte(minBY));*/
00761 
00762                         const float alpha = 0.2; //1.0F/(s+1); //have alpha as a function of scale
00763                         float oriWeight = 1.0F * (1.0F-alpha);
00764                         float colWeight = 2.67F * alpha; //21.34 * alpha; //2.67F * alpha;
00765                         rutz::shared_ptr<Keypoint>
00766                                 newkey(new Keypoint(oriVec, colVec, x * itsOctScale,
00767                                                         y * itsOctScale, sigma * itsOctScale, realangle, dogmag,
00768                                                         oriWeight, colWeight)); //set the weights for the ori and col
00769                         // add to list of keys:
00770                         keypoints.push_back(newkey);
00771                         ++ numkp;
00772 
00773                 } else {
00774 
00775 
00776                         // create a keypoint:
00777                         rutz::shared_ptr<Keypoint>
00778                                 newkey(new Keypoint(oriVec, x * itsOctScale, y * itsOctScale,
00779                                                         sigma * itsOctScale, realangle, dogmag));
00780                         // add to list of keys:
00781                         keypoints.push_back(newkey);
00782                         ++ numkp;
00783                 }
00784 
00785         }
00786         return numkp;
00787 }
00788 
00789 
00790 // ######################################################################
00791 uint ScaleSpace::createKeypoints(const float x, const float y, const float s,
00792                 const float dogmag, const ImageSet<float>& gradmag,
00793                 const ImageSet<float>& gradorie,
00794                 std::vector< rutz::shared_ptr<Keypoint> >& keypoints)
00795 {
00796         uint numkp = 0;
00797 
00798 
00799         // orientation histogram:
00800         Histogram OV(ORIENTARRAY);
00801 
00802         // radius and gaussian window calculus:
00803 
00804 
00805         // 1. Calculate orientation vector
00806         calculateOrientationVector(x, y, s, gradmag, gradorie, OV);
00807 
00808 
00809         // 2. Create feature vector and keypoint for each significant
00810         // orientation peak:
00811         numkp = createVectorsAndKeypoints(x, y, s, dogmag, gradmag, gradorie, keypoints, OV);
00812 
00813 
00814         return numkp;
00815 }
00816 
00817 // ######################################################################
00818 Image<float> ScaleSpace::getKeypointImage(std::vector< rutz::shared_ptr<Keypoint> >& keypoints)
00819 {
00820         Image<float> img(itsLumBlur[0].getDims(), ZEROS);
00821 
00822    int width = itsLumBlur[0].getWidth();
00823    int height = itsLumBlur[0].getHeight();
00824 
00825         //go over keypoints and set that value to 255
00826         std::vector<rutz::shared_ptr<Keypoint> >::const_iterator k = keypoints.begin(),
00827                 stop = keypoints.end();
00828         while(k != stop)
00829         {
00830                 float x = (*k)->getX();
00831                 float y = (*k)->getY();
00832                 const float s = (*k)->getS();
00833                 // const float o = (*k)->getO();
00834 
00835       if (x > width) x = width-1;
00836       if (y > height) y = height-1;
00837       drawDisk(img, Point2D<int>(int(x), int(y)), int(s), 255.0F);
00838                 //img.setVal(int(x), int(y), 255.0F);
00839       k++;
00840         }
00841 
00842         return img;
00843 
00844 }
00845 
00846 // ######################################################################
00847         /* So things look consistent in everyone's emacs... */
00848 /* Local Variables: */
00849 /* indent-tabs-mode: nil */
00850 /* End: */
Generated on Sun May 8 08:42:16 2011 for iLab Neuromorphic Vision Toolkit by  doxygen 1.6.3