00001 /*!@file SIFT/ScaleSpace.C Keypoint computation for SIFT obj recognition */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00005 // University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: James Bonaiuto <bonaiuto@usc.edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/SIFT/ScaleSpace.C $ 00035 // $Id: ScaleSpace.C 10423 2008-11-12 22:26:07Z mviswana $ 00036 // 00037 00038 #include "SIFT/ScaleSpace.H" 00039 00040 #include "Image/FilterOps.H" 00041 #include "Image/Kernels.H" 00042 #include "Image/MathOps.H" 00043 #include "Image/MatrixOps.H" 00044 #include "Image/DrawOps.H" 00045 #include "SIFT/Histogram.H" 00046 #include "SIFT/Keypoint.H" 00047 #include "SIFT/FeatureVector.H" 00048 #include "rutz/compat_cmath.h" 00049 #include "Image/fancynorm.H" 00050 00051 const int EXT = 10; //!< keep out edge width 00052 const float R_EDGE = 8.0F; //!< edge response threshold 00053 const float PEAK_THRESH = 2.0F; //!< peak below that doesn't count 00054 const int ORIENTARRAY = 36; //!< size of orientation array 00055 00056 00057 // ###################################################################### 00058 ScaleSpace::ScaleSpace(const ImageSet<float>& in, const float octscale, 00059 const int s, const float sigma, bool useColor) : 00060 itsOctScale(octscale), itsSigma(sigma), itsLumBlur(s + 3), itsRGBlur(s+3), 00061 itsBYBlur(s+3), itsDog(s + 2), itsUseColor(useColor) 00062 { 00063 ASSERT(s > 0); ASSERT(octscale > 0.0F); ASSERT(sigma > 0.0F); 00064 00065 const float k = powf(2.0f, 1.0f / s); 00066 00067 // our bottom image is just the original. We here assume that the 00068 // caller has pre-blurred it if necessary. See the constructor of 00069 // VisualObject for what we mean by that: 00070 itsLumBlur[0] = in[LUM_CHANNEL]; 00071 00072 if (itsUseColor){ 00073 itsRGBlur[0] = in[RG_CHANNEL]; 00074 itsBYBlur[0] = in[BY_CHANNEL]; 00075 } 00076 00077 // compute the additional gaussian blurred maps: 00078 for (int ss = 1; ss < s+3; ++ss) 00079 { 00080 // compute the stdev of a Gaussian we should convolve the 00081 // previous blurred image by, so as to multiply the total 00082 // effective blurring sigma by a factor k: 00083 const float std = sigma * powf(k, float(ss-1)) * sqrt(k*k - 1.0F); 00084 00085 // get a Gaussian kernel by that stdev, normalized to unit sum: 00086 Image<float> kernel = gaussian<float>(1.0F, std, itsLumBlur[0].getWidth(), 1.0F); 00087 kernel = kernel / float(sum(kernel)); 00088 00089 // do the convolution: 00090 itsLumBlur[ss] = sepFilter(itsLumBlur[ss-1], kernel, kernel, 00091 CONV_BOUNDARY_CLEAN); 00092 00093 if (itsUseColor){ 00094 itsRGBlur[ss] = sepFilter(itsRGBlur[ss-1], kernel, kernel, 00095 CONV_BOUNDARY_CLEAN); 00096 itsBYBlur[ss] = sepFilter(itsBYBlur[ss-1], kernel, kernel, 00097 CONV_BOUNDARY_CLEAN); 00098 } 00099 } 00100 00101 // compute the difference of gaussian images: 00102 for (int ss = 0; ss < s+2; ++ss) 00103 itsDog[ss] = itsLumBlur[ss+1] - itsLumBlur[ss]; 00104 00105 00106 /* 00107 //take the max normalize 00108 #define NORMTYPE VCXNORM_MAXNORM 00109 for (int ss=0; ss < s+3; ++ss){ 00110 itsLumBlur[ss] = maxNormalize(itsLumBlur[ss], 0.0F, 2.0F, NORMTYPE); 00111 itsRGBlur[ss] = maxNormalize(itsRGBlur[ss], 0.0F, 2.0F, NORMTYPE); 00112 itsBYBlur[ss] = maxNormalize(itsBYBlur[ss], 0.0F, 2.0F, NORMTYPE); 00113 }*/ 00114 } 00115 00116 // ###################################################################### 00117 ScaleSpace::~ScaleSpace() 00118 { } 00119 00120 // ###################################################################### 00121 Image<float> ScaleSpace::getTwoSigmaImage(int channel) const 00122 { 00123 //LINFO("Channel select"); 00124 switch (channel){ 00125 case LUM_CHANNEL: 00126 return itsLumBlur.getImage(itsLumBlur.size() - 3); 00127 case RG_CHANNEL: 00128 return itsRGBlur.getImage(itsRGBlur.size() - 3); 00129 case BY_CHANNEL: 00130 return itsRGBlur.getImage(itsBYBlur.size() - 3); 00131 } 00132 00133 LINFO("Invalid channel %i", channel); 00134 ASSERT(false); 00135 return itsLumBlur.getImage(itsLumBlur.size() - 3); 00136 } 00137 00138 00139 // ###################################################################### 00140 uint ScaleSpace::getNumBlurredImages() const 00141 { return itsLumBlur.size(); } 00142 00143 // ###################################################################### 00144 Image<float> ScaleSpace::getBlurredImage(const uint idx) const 00145 { return itsLumBlur.getImage(idx); } 00146 00147 // ###################################################################### 00148 uint ScaleSpace::getNumDoGImages() const 00149 { return itsDog.size(); } 00150 00151 // ###################################################################### 00152 Image<float> ScaleSpace::getDoGImage(const uint idx) const 00153 { return itsDog.getImage(idx); } 00154 00155 // ###################################################################### 00156 uint ScaleSpace::findKeypoints(std::vector< rutz::shared_ptr<Keypoint> >& keypoints) 00157 { 00158 int w = itsLumBlur[0].getWidth(), h = itsLumBlur[0].getHeight(); 00159 Image<byte> analyzed(w, h, ZEROS); // keep track of visited locations 00160 uint numkp = 0; 00161 00162 // loop through blurred images in scale space: 00163 for (uint sc = 1; sc <= itsLumBlur.size()-3; sc++) 00164 { 00165 LDEBUG("Processing octave %.2f scale %d/%d", itsOctScale, 00166 sc, itsLumBlur.size()-3); 00167 00168 // compute magnitude and orientation of the gradient of the 00169 // blurred image for the scale of interest: 00170 ImageSet<float> gradmag(3), gradori(3); 00171 gradient(itsLumBlur[sc], gradmag[LUM_CHANNEL], gradori[LUM_CHANNEL]); 00172 00173 if (itsUseColor){ 00174 gradient(itsRGBlur[sc], gradmag[RG_CHANNEL], gradori[RG_CHANNEL]); 00175 gradient(itsBYBlur[sc], gradmag[BY_CHANNEL], gradori[BY_CHANNEL]); 00176 00177 } 00178 00179 00180 // for every pixels in image except for edges 00181 for (int x = EXT; x < w - EXT; x++) 00182 for (int y = EXT; y < h - EXT; y++) 00183 // check for maximum or minimum of DoG in scale space; 00184 // if found, trigger accurate localization and pruning 00185 // of keypoint: 00186 if (checkForMinMax(x, y, itsDog[sc-1], itsDog[sc], itsDog[sc+1])) 00187 numkp += accurateLocalizationAndPruning(x, y, sc, analyzed, 00188 gradmag, gradori, 00189 keypoints); 00190 } 00191 return numkp; 00192 } 00193 00194 // ###################################################################### 00195 00196 // Custom and highly irregular version of SIFT keypoint creation. This 00197 // was written specifically for the Lazebnik Beyond Bags-of-Features 00198 // paper. Works only for level zero. Thou hast been warned. 00199 // 00200 // For more info, see src/Neuro/GistEstimatorBeyondBoF.H and .C; also, 00201 // src/Gist/train-bbof.C. 00202 static void 00203 pixelPatchCreateKeypoint(const float x, const float y, const float sigma, 00204 const float dogmag, const Image<float>& gradmag, 00205 const Image<float>& gradorie, 00206 std::vector<rutz::shared_ptr<Keypoint> >& keypoints) 00207 { 00208 const int xi = int(x + 0.5f); 00209 const int yi = int(y + 0.5f); 00210 00211 FeatureVector fv; 00212 00213 // check this scale 00214 const int radius = int(5.0f * sigma + 0.5f); // NOTE: Lowe uses radius=8? 00215 const float gausssig = float(radius); // 1/2 width of descript window 00216 const float gaussfac = -0.5f/(gausssig * gausssig); 00217 00218 // Scan a window of diameter 2*radius+1 around the point of 00219 // interest, and we will cumulate local samples into a 4x4 grid 00220 // of bins, with interpolation. NOTE: rx and ry loop over a 00221 // square that is assumed centered around the point of interest. 00222 for (int ry = -radius; ry < radius; ry++) 00223 for (int rx = -radius; rx < radius; rx++) 00224 { 00225 // get the coords in the image frame of reference: 00226 const float orgX = rx + float(xi); 00227 const float orgY = ry + float(yi); 00228 00229 if (! gradmag.coordsOk(orgX, orgY)) // outside image 00230 continue; // forget this coordinate 00231 00232 // find the fractional coords of the corresponding bin 00233 // (we subdivide our window into a 4x4 grid of bins): 00234 const float xf = 2.0f + 2.0f * float(rx)/float(radius); 00235 const float yf = 2.0f + 2.0f * float(ry)/float(radius); 00236 00237 // find the Gaussian weight from distance to center and 00238 // get weighted gradient magnitude: 00239 const float gaussFactor = expf((rx*rx + ry*ry) * gaussfac); 00240 const float weightedMagnitude = 00241 gaussFactor * gradmag.getValInterp(orgX, orgY); 00242 00243 // get the gradient orientation relative to the keypoint 00244 // orientation and scale it for 8 orientation bins: 00245 float gradAng = gradorie.getValInterp(orgX, orgY); 00246 gradAng=fmod(gradAng, 2*M_PI); //bring the range from 0 to M_PI 00247 00248 //convert from -M_PI to M_PI 00249 if (gradAng < 0.0) 00250 gradAng += 2*M_PI; //convert to -M_PI to M_PI 00251 if (gradAng >= M_PI) 00252 gradAng -= 2*M_PI; 00253 //split to eight bins 00254 const float orient = (gradAng + M_PI) * 8 / (2 * M_PI); 00255 00256 // will be interpolated into 2 x 2 x 2 bins: 00257 fv.addValue(xf, yf, orient, weightedMagnitude); 00258 } 00259 00260 // normalize, clamp, scale and convert to byte: 00261 std::vector<byte> oriVec; 00262 fv.toByteKey(oriVec); 00263 00264 rutz::shared_ptr<Keypoint> 00265 newkey(new Keypoint(oriVec, x, y, sigma, 0, dogmag)); 00266 keypoints.push_back(newkey); 00267 } 00268 00269 uint 00270 ScaleSpace:: 00271 getGridKeypoints(std::vector<rutz::shared_ptr<Keypoint> >& keypoints) 00272 { 00273 // compute magnitude and orientation of the gradient of the 00274 // blurred image for the scale of interest: 00275 Image<float> gradmag, gradori; 00276 gradient(itsLumBlur[0], gradmag, gradori); 00277 00278 int w = itsLumBlur[0].getWidth(), 00279 h = itsLumBlur[0].getHeight(); 00280 uint numkp = 0; 00281 const int radius = int(5.0f * itsSigma + 0.5f); 00282 for (int y = radius; y < h - radius + 1; y += radius) 00283 for (int x = radius; x < w - radius + 1; x += radius) { 00284 pixelPatchCreateKeypoint(x, y, itsSigma, 1, 00285 gradmag, gradori, keypoints); 00286 ++numkp ; 00287 } 00288 00289 return numkp ; 00290 } 00291 00292 // ###################################################################### 00293 uint ScaleSpace:: 00294 accurateLocalizationAndPruning(const int x, const int y, const int s, 00295 Image<byte>& analyzed, 00296 const ImageSet<float>& gradmag, 00297 const ImageSet<float>& gradori, 00298 std::vector< rutz::shared_ptr<Keypoint> >& keypoints) 00299 { 00300 // Interactive 3D interpolation (x direction, y direction, and scale 00301 // direction). We do a 3d fit of this extremum. The resulting dpos 00302 // contains the accurate fractional location of the extremum 00303 // relative to (new_x, new_y, s), and dDog is the derivative of DoG 00304 // at that point: 00305 int new_x = x, new_y = y, new_s = s; 00306 Image<float> dDog, dpos; float dx2, dy2, dxdy; 00307 dpos = fit3D(new_x, new_y, s, dDog, dx2, dy2, dxdy); 00308 00309 // give up on some overflow cases: 00310 if (fabs(dpos.getVal(0)) > 1.5F) return 0; 00311 if (fabs(dpos.getVal(1)) > 1.5F) return 0; 00312 if (fabs(dpos.getVal(2)) > 1.5F) return 0; 00313 00314 // If dpos is larger than 0.5 in x or y direction, we should move 00315 // our keypoint to a better location and try our 3D fit again: 00316 bool moved = false; 00317 if (dpos.getVal(0) > 0.5) { ++new_x; moved = true; } 00318 else if (dpos.getVal(0) < -0.5) { --new_x; moved = true; } 00319 00320 if (dpos.getVal(1) > 0.5) { ++new_y; moved = true; } 00321 else if (dpos.getVal(1) < -0.5) { --new_y; moved = true; } 00322 00323 if (dpos.getVal(2) > 0.5) { 00324 if (new_s >= int(itsDog.size()) - 2) return 0; // can't go higher in scale 00325 ++new_s; moved = true; 00326 } else if (dpos.getVal(2) < -0.5) { 00327 if (new_s <= 1) return 0; // can't go lower in scale 00328 --new_s; moved = true; 00329 } 00330 00331 // if we moved, recompute the 3D fit: 00332 if (moved) 00333 { 00334 dpos = fit3D(new_x, new_y, new_s, dDog, dx2, dy2, dxdy); 00335 00336 // give up if we can't get close: 00337 if (fabs(dpos.getVal(0)) > 0.5F) return 0; 00338 if (fabs(dpos.getVal(1)) > 0.5F) return 0; 00339 if (fabs(dpos.getVal(2)) > 0.5F) return 0; 00340 } 00341 00342 // if already analyzed then quit, otherwise mark as analyzed: 00343 if (analyzed.getVal(new_x, new_y)) return 0; 00344 analyzed.setVal(new_x, new_y, 255); 00345 00346 // here are our new fractional coordinates: 00347 const float xf = float(new_x) + float(dpos.getVal(0)); 00348 const float yf = float(new_y) + float(dpos.getVal(1)); 00349 const float sf = float(new_s) + float(dpos.getVal(2)); 00350 00351 // calculate new peak height, using linear approximation: 00352 const float peak_height = 00353 itsDog[new_s].getVal(new_x, new_y) + 0.5F * dotprod(dpos, dDog); 00354 00355 // forget about that keypoint if the DoG value is too small 00356 // (low-contrast keypoint): 00357 if (fabsf(peak_height) < PEAK_THRESH ) return 0; 00358 00359 // pruning big edge response (this will be used as denominator in 00360 // the following test): 00361 if (fabs(dx2 * dy2 - dxdy * dxdy) < 1.0e-5F) return 0; 00362 00363 // calculate edge test, to eliminate keypoints along strong edges: 00364 if ((dx2+dy2)*(dx2+dy2) / fabs(dx2 * dy2 - dxdy * dxdy) >= 00365 (R_EDGE + 1.0F) * (R_EDGE + 1.0F) / R_EDGE) return 0; 00366 00367 // if we have reached this point, we got a stable keypoint! Create 00368 // its feature vector: 00369 return createKeypoints(xf, yf, sf, peak_height, gradmag, gradori, keypoints); 00370 } 00371 00372 // ###################################################################### 00373 bool ScaleSpace::checkForMinMax(const int x, const int y, 00374 const Image<float>& im0, 00375 const Image<float>& im1, 00376 const Image<float>& im2) const 00377 { 00378 // get value: 00379 float val = im1.getVal(x, y); 00380 00381 // if below peak threshold just give up: 00382 if (fabsf(val) < PEAK_THRESH) return false; 00383 00384 // verify for max or min: 00385 if (val < im1.getVal(x-1, y)) 00386 { 00387 // ok, then let's check for a minimum: 00388 if (val >= im1.getVal(x-1, y-1)) return false; 00389 if (val >= im1.getVal(x-1, y+1)) return false; 00390 if (val >= im1.getVal(x , y-1)) return false; 00391 if (val >= im1.getVal(x , y+1)) return false; 00392 if (val >= im1.getVal(x+1, y-1)) return false; 00393 if (val >= im1.getVal(x+1, y )) return false; 00394 if (val >= im1.getVal(x+1, y+1)) return false; 00395 00396 // check for min level -1: 00397 if (val >= im0.getVal(x-1, y-1)) return false; 00398 if (val >= im0.getVal(x-1, y )) return false; 00399 if (val >= im0.getVal(x-1, y+1)) return false; 00400 if (val >= im0.getVal(x , y-1)) return false; 00401 if (val >= im0.getVal(x , y )) return false; 00402 if (val >= im0.getVal(x , y+1)) return false; 00403 if (val >= im0.getVal(x+1, y-1)) return false; 00404 if (val >= im0.getVal(x+1, y )) return false; 00405 if (val >= im0.getVal(x+1, y+1)) return false; 00406 00407 // check for min level +1: 00408 if (val >= im2.getVal(x-1, y-1)) return false; 00409 if (val >= im2.getVal(x-1, y )) return false; 00410 if (val >= im2.getVal(x-1, y+1)) return false; 00411 if (val >= im2.getVal(x , y-1)) return false; 00412 if (val >= im2.getVal(x , y )) return false; 00413 if (val >= im2.getVal(x , y+1)) return false; 00414 if (val >= im2.getVal(x+1, y-1)) return false; 00415 if (val >= im2.getVal(x+1, y )) return false; 00416 if (val >= im2.getVal(x+1, y+1)) return false; 00417 00418 return true; 00419 } 00420 else if (val > im1.getVal(x-1, y)) 00421 { 00422 // check for maximum: 00423 if (val <= im1.getVal(x-1, y-1)) return false; 00424 if (val <= im1.getVal(x-1, y+1)) return false; 00425 if (val <= im1.getVal(x , y-1)) return false; 00426 if (val <= im1.getVal(x , y+1)) return false; 00427 if (val <= im1.getVal(x+1, y-1)) return false; 00428 if (val <= im1.getVal(x+1, y )) return false; 00429 if (val <= im1.getVal(x+1, y+1)) return false; 00430 00431 // check for max level -1: 00432 if (val <= im0.getVal(x-1, y-1)) return false; 00433 if (val <= im0.getVal(x-1, y )) return false; 00434 if (val <= im0.getVal(x-1, y+1)) return false; 00435 if (val <= im0.getVal(x , y-1)) return false; 00436 if (val <= im0.getVal(x , y )) return false; 00437 if (val <= im0.getVal(x , y+1)) return false; 00438 if (val <= im0.getVal(x+1, y-1)) return false; 00439 if (val <= im0.getVal(x+1, y )) return false; 00440 if (val <= im0.getVal(x+1, y+1)) return false; 00441 00442 // check for max level +1: 00443 if (val <= im2.getVal(x-1, y-1)) return false; 00444 if (val <= im2.getVal(x-1, y )) return false; 00445 if (val <= im2.getVal(x-1, y+1)) return false; 00446 if (val <= im2.getVal(x , y-1)) return false; 00447 if (val <= im2.getVal(x , y )) return false; 00448 if (val <= im2.getVal(x , y+1)) return false; 00449 if (val <= im2.getVal(x+1, y-1)) return false; 00450 if (val <= im2.getVal(x+1, y )) return false; 00451 if (val <= im2.getVal(x+1, y+1)) return false; 00452 00453 return true; 00454 } 00455 00456 return false; 00457 } 00458 00459 // ###################################################################### 00460 // 00461 // Fit in 3D 00462 // in the (X, Y, S) space, fit the peak, to find it's best extrenum 00463 // return solution and dDog as first derivative of Dog 00464 // 00465 Image<float> ScaleSpace::fit3D(const int x, const int y, const int s, 00466 Image<float>& dDog, float& dX2, 00467 float& dY2, float& dXdY) const 00468 { 00469 Image<float> below = itsDog[s - 1]; 00470 Image<float> cur = itsDog[s]; 00471 Image<float> above = itsDog[s + 1]; 00472 00473 // standard approximations of first derivatives of the DoG: 00474 float dX = 0.5F * (cur.getVal(x+1, y) - cur.getVal(x-1, y)); 00475 float dY = 0.5F * (cur.getVal(x, y+1) - cur.getVal(x, y-1)); 00476 float dS = 0.5F * (above.getVal(x, y) - below.getVal(x, y)); 00477 00478 // standard approximations of second derivatives of the DoG: 00479 dX2 = cur.getVal(x-1, y) - 2.0F*cur.getVal(x, y) + cur.getVal(x+1, y); 00480 dY2 = cur.getVal(x, y-1) - 2.0F*cur.getVal(x, y) + cur.getVal(x, y+1); 00481 float dS2 = below.getVal(x, y) - 2.0F*cur.getVal(x, y) + above.getVal(x, y); 00482 00483 // standard approximation of crossed derivatives of the DoG: 00484 dXdY = 0.25F * (cur.getVal(x+1, y+1) - cur.getVal(x-1, y+1)) - 00485 0.25F * (cur.getVal(x+1, y-1) - cur.getVal(x-1, y-1)); 00486 float dSdX = 0.25F * (above.getVal(x+1, y) - above.getVal(x-1, y)) - 00487 0.25F * (below.getVal(x+1, y) - below.getVal(x-1, y)); 00488 float dSdY = 0.25F * (above.getVal(x, y+1) - above.getVal(x, y-1)) - 00489 0.25F * (below.getVal(x, y+1) - below.getVal(x, y-1)); 00490 00491 // matrix to solve is: 00492 // dX [ dX2 dXdY dXdS ] 00493 // dY = [ V ] [ dXdY dY2 dYdS ] 00494 // dS [ dXdS dYdS dS2 ] 00495 dDog.resize(1, 3); 00496 dDog.setVal(0, -dX); dDog.setVal(1, -dY); dDog.setVal(2, -dS); 00497 00498 Image<float> mat(3, 3, NO_INIT); 00499 mat.setVal(0, 0, dX2); mat.setVal(1, 1, dY2); mat.setVal(2, 2, dS2); 00500 mat.setVal(1, 0, dXdY); mat.setVal(0, 1, dXdY); 00501 mat.setVal(2, 0, dSdX); mat.setVal(0, 2, dSdX); 00502 mat.setVal(2, 1, dSdY); mat.setVal(1, 2, dSdY); 00503 00504 Image<float> result; 00505 try 00506 { 00507 mat = matrixInv(mat); 00508 result = matrixMult(mat, dDog); 00509 } 00510 catch (SingularMatrixException& e) 00511 { 00512 LDEBUG("Couldn't invert matrix -- RETURNING [ 2.0 2.0 2.0 ]^T"); 00513 result.resize(1, 3); 00514 result.setVal(0, 0, 2.0); 00515 result.setVal(0, 1, 2.0); 00516 result.setVal(0, 2, 2.0); 00517 } 00518 return result; 00519 } 00520 00521 00522 // ###################################################################### 00523 00524 void ScaleSpace::calculateOrientationVector(const float x, const float y, const float s, 00525 const ImageSet<float>& gradmag, const ImageSet<float>& gradorie, Histogram& OV) { 00526 00527 00528 // compute the effective blurring sigma corresponding to the 00529 // fractional scale s: 00530 const float sigma = itsSigma * powf(2.0F, s / float(itsLumBlur.size() - 3)); 00531 00532 const float sig = 1.5F * sigma, inv2sig2 = - 0.5F / (sig * sig); 00533 const int dimX = gradmag[LUM_CHANNEL].getWidth(), dimY = gradmag[LUM_CHANNEL].getHeight(); 00534 00535 const int xi = int(x + 0.5f); 00536 const int yi = int(y + 0.5f); 00537 00538 const int rad = int(3.0F * sig); 00539 const int rad2 = rad * rad; 00540 00541 00542 // search bounds: 00543 int starty = yi - rad; if (starty < 0) starty = 0; 00544 int stopy = yi + rad; if (stopy >= dimY) stopy = dimY-1; 00545 00546 // 1. Calculate orientation vector 00547 for (int ind_y = starty; ind_y <= stopy; ind_y ++) 00548 { 00549 // given that y, get the corresponding range of x values that 00550 // lie within the disk (and remain within the image): 00551 const int yoff = ind_y - yi; 00552 const int bound = int(sqrtf(float(rad2 - yoff*yoff)) + 0.5F); 00553 int startx = xi - bound; if (startx < 0) startx = 0; 00554 int stopx = xi + bound; if (stopx >= dimX) stopx = dimX-1; 00555 00556 for (int ind_x = startx; ind_x <= stopx; ind_x ++) 00557 { 00558 const float dx = float(ind_x) - x, dy = float(ind_y) - y; 00559 const float distSq = dx * dx + dy * dy; 00560 00561 // get gradient: 00562 const float gradVal = gradmag[LUM_CHANNEL].getVal(ind_x, ind_y); 00563 00564 // compute the gaussian weight for this histogram entry: 00565 const float gaussianWeight = expf(distSq * inv2sig2); 00566 00567 // add this orientation to the histogram 00568 // [-pi ; pi] -> [0 ; 2pi] 00569 float angle = gradorie[LUM_CHANNEL].getVal(ind_x, ind_y) + M_PI; 00570 00571 // [0 ; 2pi] -> [0 ; 36] 00572 angle = 0.5F * angle * ORIENTARRAY / M_PI; 00573 while (angle < 0.0F) angle += ORIENTARRAY; 00574 while (angle >= ORIENTARRAY) angle -= ORIENTARRAY; 00575 00576 OV.addValueInterp(angle, gaussianWeight * gradVal); 00577 } 00578 } 00579 00580 00581 // smooth the orientation histogram 3 times: 00582 for (int i = 0; i < 3; i++) OV.smooth(); 00583 } 00584 00585 // ###################################################################### 00586 00587 00588 uint ScaleSpace::createVectorsAndKeypoints(const float x, const float y, const float s, 00589 const float dogmag, const ImageSet<float>& gradmag, 00590 const ImageSet<float>& gradorie, std::vector < rutz::shared_ptr<Keypoint> >& keypoints, Histogram& OV) 00591 { 00592 00593 const float sigma = itsSigma * powf(2.0F, s / float(itsLumBlur.size() - 3)); 00594 00595 // find the max in the histogram: 00596 float maxPeakValue = OV.findMax(); 00597 00598 const int xi = int(x + 0.5f); 00599 const int yi = int(y + 0.5f); 00600 00601 uint numkp = 0; 00602 00603 // 2. Create feature vector and keypoint for each significant 00604 // orientation peak: 00605 for (int bin = 0; bin < ORIENTARRAY; bin++) 00606 { 00607 // consider the peak centered around 'bin': 00608 const float midval = OV.getValue(bin); 00609 00610 // if current value much smaller than global peak, forget it: 00611 if (midval < 0.8F * maxPeakValue) continue; 00612 00613 // get value to the left of current value 00614 const float leftval = OV.getValue((bin == 0) ? ORIENTARRAY-1 : bin-1); 00615 00616 // get value to the right of current value 00617 const float rightval = OV.getValue((bin == ORIENTARRAY-1) ? 0 : bin+1); 00618 00619 // only consider local peaks: 00620 if (leftval >= midval) continue; 00621 if (rightval >= midval) continue; 00622 00623 // interpolate the values to get the orientation of the peak: 00624 // with f(x) = ax^2 + bx + c 00625 // f(-1) = x0 = leftval 00626 // f( 0) = x1 = midval 00627 // f(+1) = x2 = rightval 00628 // => a = (x0+x2)/2 - x1 00629 // b = (x2-x0)/2 00630 // c = x1 00631 // f'(x) = 0 => x = -b/2a 00632 const float a = 0.5f * (leftval + rightval) - midval; 00633 const float b = 0.5f * (rightval - leftval); 00634 float realangle = float(bin) - 0.5F * b / a; 00635 00636 realangle *= 2.0F * M_PI / ORIENTARRAY; // [0:36] to [0:2pi] 00637 realangle -= M_PI; // [0:2pi] to [-pi:pi] 00638 00639 // ############ Create keypoint: 00640 00641 // compute the feature vector: 00642 FeatureVector fv; 00643 FeatureVector Colfv(4, 4, 3, false); //create 4x4x3 bids with no wrap 00644 00645 const float sinAngle = sin(realangle), cosAngle = cos(realangle); 00646 00647 // check this scale 00648 const int radius = int(5.0F * sigma + 0.5F); // NOTE: Lowe uses radius=8? 00649 const float gausssig = float(radius); // 1/2 width of descript window 00650 const float gaussfac = - 0.5F / (gausssig * gausssig); 00651 00652 00653 // Scan a window of diameter 2*radius+1 around the point of 00654 // interest, and we will cumulate local samples into a 4x4 grid 00655 // of bins, with interpolation. NOTE: rx and ry loop over a 00656 // square that is assumed centered around the point of interest 00657 // and rotated to the gradient orientation (realangle): 00658 00659 int scale = abs(int(s)); 00660 scale = scale > 5 ? 5 : scale; 00661 00662 float maxRG=-9999, minRG=9999, maxBY=-9999, minBY=9999, maxBW=-9999, minBW=9999; 00663 00664 for (int ry = -radius; ry <= radius; ry++) 00665 for (int rx = -radius; rx <= radius; rx++) 00666 { 00667 // rotate the point: 00668 const float newX = rx * cosAngle - ry * sinAngle; 00669 const float newY = rx * sinAngle + ry * cosAngle; 00670 00671 // get the coords in the image frame of reference: 00672 const float orgX = newX + float(xi); 00673 const float orgY = newY + float(yi); 00674 00675 // if outside the image, forget it: 00676 if (gradmag[LUM_CHANNEL].coordsOk(orgX, orgY) == false) continue; 00677 00678 // find the fractional coords of the corresponding bin 00679 // (we subdivide our window into a 4x4 grid of bins): 00680 const float xf = 2.0F + 2.0F * float(rx) / float(radius); 00681 const float yf = 2.0F + 2.0F * float(ry) / float(radius); 00682 00683 00684 // find the Gaussian weight from distance to center and 00685 // get weighted gradient magnitude: 00686 const float gaussFactor = expf((newX*newX+newY*newY) * gaussfac); 00687 const float weightedMagnitude = 00688 gaussFactor * gradmag[LUM_CHANNEL].getValInterp(orgX, orgY); 00689 00690 // get the gradient orientation relative to the keypoint 00691 // orientation and scale it for 8 orientation bins: 00692 float gradAng = gradorie[LUM_CHANNEL].getValInterp(orgX, orgY) - realangle; 00693 00694 gradAng=fmod(gradAng, 2*M_PI); //bring the range from 0 to M_PI 00695 00696 //convert from -M_PI to M_PI 00697 if (gradAng < 0.0) gradAng+=2*M_PI; //convert to -M_PI to M_PI 00698 if (gradAng >= M_PI) gradAng-=2*M_PI; 00699 //split to eight bins 00700 const float orient = (gradAng + M_PI) * 8 / (2 * M_PI); 00701 00702 /* 00703 //reflect the angle to convert from 0 to M_PI 00704 if (gradAng >= M_PI) gradAng-=M_PI; 00705 //split to four bins 00706 const float orient = (gradAng + M_PI) * 4 / (2 * M_PI); 00707 */ 00708 00709 // will be interpolated into 2 x 2 x 2 bins: 00710 fv.addValue(xf, yf, orient, weightedMagnitude); 00711 00712 00713 if (itsUseColor){ 00714 00715 /* float bw = gradmag[LUM_CHANNEL].getValInterp(orgX, orgY); 00716 float rg = gradmag[RG_CHANNEL].getValInterp(orgX, orgY); 00717 float by = gradmag[BY_CHANNEL].getValInterp(orgX, orgY); */ 00718 00719 float bw = itsLumBlur[scale].getValInterp(orgX, orgY); 00720 float rg = itsRGBlur[scale].getValInterp(orgX, orgY); 00721 float by = itsBYBlur[scale].getValInterp(orgX, orgY); 00722 00723 00724 Colfv.addValue(xf, yf, 0.5, bw*gaussFactor); 00725 Colfv.addValue(xf, yf, 1.5, rg*gaussFactor); 00726 Colfv.addValue(xf, yf, 2.5, by*gaussFactor); 00727 00728 00729 //find the max and min of the colors 00730 if (bw > maxBW) maxBW = bw; 00731 if (bw < minBW) minBW = bw; 00732 00733 if (rg > maxRG) maxRG = rg; 00734 if (rg < minRG) minRG = rg; 00735 00736 if (by > maxBY) maxBY = by; 00737 if (by < minBY) minBY = by; 00738 00739 00740 } 00741 00742 00743 } 00744 00745 // normalize, clamp, scale and convert to byte: 00746 std::vector<byte> oriVec; 00747 fv.toByteKey(oriVec); 00748 00749 00750 if (itsUseColor){ 00751 00752 std::vector<byte> colVec; 00753 Colfv.toByteKey(colVec, -1, true); 00754 /*//put the max and min colors 00755 colVec.push_back(byte(maxBW)); 00756 colVec.push_back(byte(minBW)); 00757 colVec.push_back(byte(maxRG)); 00758 colVec.push_back(byte(minRG)); 00759 colVec.push_back(byte(maxBY)); 00760 colVec.push_back(byte(minBY));*/ 00761 00762 const float alpha = 0.2; //1.0F/(s+1); //have alpha as a function of scale 00763 float oriWeight = 1.0F * (1.0F-alpha); 00764 float colWeight = 2.67F * alpha; //21.34 * alpha; //2.67F * alpha; 00765 rutz::shared_ptr<Keypoint> 00766 newkey(new Keypoint(oriVec, colVec, x * itsOctScale, 00767 y * itsOctScale, sigma * itsOctScale, realangle, dogmag, 00768 oriWeight, colWeight)); //set the weights for the ori and col 00769 // add to list of keys: 00770 keypoints.push_back(newkey); 00771 ++ numkp; 00772 00773 } else { 00774 00775 00776 // create a keypoint: 00777 rutz::shared_ptr<Keypoint> 00778 newkey(new Keypoint(oriVec, x * itsOctScale, y * itsOctScale, 00779 sigma * itsOctScale, realangle, dogmag)); 00780 // add to list of keys: 00781 keypoints.push_back(newkey); 00782 ++ numkp; 00783 } 00784 00785 } 00786 return numkp; 00787 } 00788 00789 00790 // ###################################################################### 00791 uint ScaleSpace::createKeypoints(const float x, const float y, const float s, 00792 const float dogmag, const ImageSet<float>& gradmag, 00793 const ImageSet<float>& gradorie, 00794 std::vector< rutz::shared_ptr<Keypoint> >& keypoints) 00795 { 00796 uint numkp = 0; 00797 00798 00799 // orientation histogram: 00800 Histogram OV(ORIENTARRAY); 00801 00802 // radius and gaussian window calculus: 00803 00804 00805 // 1. Calculate orientation vector 00806 calculateOrientationVector(x, y, s, gradmag, gradorie, OV); 00807 00808 00809 // 2. Create feature vector and keypoint for each significant 00810 // orientation peak: 00811 numkp = createVectorsAndKeypoints(x, y, s, dogmag, gradmag, gradorie, keypoints, OV); 00812 00813 00814 return numkp; 00815 } 00816 00817 // ###################################################################### 00818 Image<float> ScaleSpace::getKeypointImage(std::vector< rutz::shared_ptr<Keypoint> >& keypoints) 00819 { 00820 Image<float> img(itsLumBlur[0].getDims(), ZEROS); 00821 00822 int width = itsLumBlur[0].getWidth(); 00823 int height = itsLumBlur[0].getHeight(); 00824 00825 //go over keypoints and set that value to 255 00826 std::vector<rutz::shared_ptr<Keypoint> >::const_iterator k = keypoints.begin(), 00827 stop = keypoints.end(); 00828 while(k != stop) 00829 { 00830 float x = (*k)->getX(); 00831 float y = (*k)->getY(); 00832 const float s = (*k)->getS(); 00833 // const float o = (*k)->getO(); 00834 00835 if (x > width) x = width-1; 00836 if (y > height) y = height-1; 00837 drawDisk(img, Point2D<int>(int(x), int(y)), int(s), 255.0F); 00838 //img.setVal(int(x), int(y), 255.0F); 00839 k++; 00840 } 00841 00842 return img; 00843 00844 } 00845 00846 // ###################################################################### 00847 /* So things look consistent in everyone's emacs... */ 00848 /* Local Variables: */ 00849 /* indent-tabs-mode: nil */ 00850 /* End: */