00001 /*!@file SIFT/ScaleSpace.H ScaleSpace computation for SIFT obj recognition */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00005 // University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: James Bonaiuto <bonaiuto@usc.edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/SIFT/ScaleSpace.H $ 00035 // $Id: ScaleSpace.H 10423 2008-11-12 22:26:07Z mviswana $ 00036 // 00037 00038 #ifndef SCALESPACE_H_DEFINED 00039 #define SCALESPACE_H_DEFINED 00040 00041 #include "Image/Image.H" 00042 #include "Image/ImageSet.H" 00043 #include "SIFT/Histogram.H" 00044 #include "rutz/shared_ptr.h" 00045 00046 #include <vector> 00047 class Keypoint; 00048 00049 00050 //! ScaleSpace is like a Gaussian pyramid but with several images per level 00051 /*! ScaleSpace is a variation of a Gaussian pyramid, where we have 00052 several images convolved by increasingly large Gaussian filters for 00053 each level (pixel size). So, while in the standard dyadic Gaussian 00054 pyramid we convolve an image at a given resolution, then decimate by 00055 a factor 2 it to yield the next lower resolution, here we convolve 00056 at each resolution several times with increasingly large Gaussians 00057 and decimate only once every several images. This ScaleSpace class 00058 represents the several convolved images that are obtained for one 00059 resolution (one so-called octave). Additionally, ScaleSpace provides 00060 machinery for SIFT keypoint extraction. In the SIFT algorithm we use 00061 several ScaleSpace objects for several image resolutions. 00062 00063 Some of the code here based on the Hugin panorama software - see 00064 http://hugin.sourceforge.net - but substantial modifications have 00065 been made, espacially by carefully going over David Lowe's IJCV 2004 00066 paper. Some of the code also comes from http://autopano.kolor.com/ 00067 but substantial debugging has also been made. */ 00068 class ScaleSpace 00069 { 00070 public: 00071 //Channles def 00072 enum ChannlesDef {LUM_CHANNEL = 0, RG_CHANNEL = 1, BY_CHANNEL = 2}; 00073 00074 //! Constructor 00075 /*! Take an input image and create a bunch of Gaussian-convolved 00076 versions of it as well as a bunch of DoG versions of it. We 00077 obtain the following images, given 's' images per octave and a 00078 baseline Gaussian sigma of 'sigma'. In all that follows, [*] 00079 represents the convolution operator. 00080 00081 We want that once we have convolved the image 's' times we end up 00082 with an images blurred by 2 * sigma. It is well known that 00083 convolving several times by Gaussians is equivalent to convolving 00084 once by a single Gaussian whose variance (sigma^2) is the sum of 00085 all the variances of the other Gaussians. It is easy to show that 00086 we then want to convolve each image (n >= 1) by a Gaussian of 00087 stdev std(n) = sigma(n-1) * sqrt(k*k - 1.0) where k=2^(1/s) and 00088 sigma(n) = sigma * k^n, which means that each time we multiply the 00089 total effective sigma by k. So, in summary, for every n >= 1, 00090 std(n) = sigma * k^(n-1) * sqrt(k*k - 1.0). 00091 00092 For our internal Gaussian imageset, we get s+3 images; the reason 00093 for that is that we want to have all the possible DoGs for that 00094 octave: 00095 00096 blur[ 0] = in [*] G(sigma) 00097 blur[ 1] = blur[0] [*] G(std(1)) = in [*] G(k*sigma) 00098 blur[ 2] = blur[1] [*] G(std(2)) = in [*] G(k^2*sigma) 00099 ... 00100 blur[ s] = blur[s-1] [*] G(std(s)) = in [*] G(k^s*sigma) = in[*]G(2*sigma) 00101 blur[s+1] = blur[s] [*] G(std(s+1)) = in [*] G(2*k*sigma) 00102 blur[s+2] = blur[s+1] [*] G(std(s+2)) = in [*] G(2*k^2*sigma) 00103 00104 For our internal DoG imageset, we just take differences between 00105 two adjacent images in the blurred imageset, yielding s+2 images 00106 such that: 00107 00108 dog[ 0] = in [*] (G(k*sigma) - G(sigma)) 00109 dog[ 1] = in [*] (G(k^2*sigma) - G(k*sigma)) 00110 dog[ 2] = in [*] (G(k^3*sigma) - G(k^2*sigma)) 00111 ... 00112 dog[ s] = in [*] (G(2*sigma) - G(k^(s-1)*sigma)) 00113 dog[s+1] = in [*] (G(2*k*sigma) - G(2*sigma)) 00114 00115 The reason why we need to compute dog[s+1] is that to find 00116 keypoints we will look for extrema in the scalespace, which 00117 requires comparing the dog value at a given level (1 .. s) to the 00118 dog values at levels immediately above and immediately below. 00119 00120 To chain ScaleSpaces, you should construct a first one from your 00121 original image, after you have ensured that it has a blur of 00122 'sigma'. Then do a getTwoSigmaImage(), decimate the result using 00123 decXY(), and construct your next ScaleSpace directly from 00124 that. Note that when you chain ScaleSpace objects, the effective 00125 blurring accelerates: the first ScaleSpace goes from sigma to 00126 2.0*sigma, the second from 2.0*sigma to 4.0*sigma, and so on. 00127 00128 @param in the input image. 00129 @param octscale baseline octave scale for this scalespace compared 00130 to the size of our original image. For example, if 'in' has been 00131 decimated by a factor 4 horizontally and vertically relatively to 00132 the original input image, octscale should be 4.0. This is used 00133 later on when assembling keypoint descriptors, so that we remember 00134 to scale the coordinates to the keypoints from our coordinates 00135 (computed in a possibly reduced input image) back to the 00136 coordinates of the original input image. 00137 @param s number of Gaussian scales per octave. 00138 @param sigma baseline Gaussian sigma to use. */ 00139 ScaleSpace(const ImageSet<float>& in, const float octscale, 00140 const int s = 3, const float sigma = 1.6F, bool useColor=false); 00141 00142 //! Destructor 00143 ~ScaleSpace(); 00144 00145 //! Get the image blurred by 2*sigma 00146 /*! This image can then be downscaled and fed as input to the next 00147 ScaleSpace. */ 00148 Image<float> getTwoSigmaImage(int channel) const; 00149 00150 //! Find the keypoints in the previously provided picture 00151 /*! The ScaleSpace will be searched for good SIFT keypoints. Any 00152 keypoint found will be pushed to the back of the provided 00153 vector. Note that the vector is not cleared, so if you pass a 00154 non-empty vector, new keypoints will just be added to is. Returns 00155 the number of keypoints added to the vector of keypoints. */ 00156 uint findKeypoints(std::vector< rutz::shared_ptr<Keypoint> >& keypoints); 00157 00158 //! Get the keypoints in the previously provided picture 00159 /*! The ScaleSpace will extract keypoints in a grid setup. Those 00160 keypoints will be pushed to the back of the provided 00161 vector. Note that the vector is not cleared, so if you pass a 00162 non-empty vector, new keypoints will just be added to it. Returns 00163 the number of keypoints added to the vector of keypoints. */ 00164 uint getGridKeypoints(std::vector<rutz::shared_ptr<Keypoint> >& keypoints); 00165 00166 //! Get number of blurred images 00167 uint getNumBlurredImages() const; 00168 00169 //! Get a blurred image 00170 Image<float> getBlurredImage(const uint idx) const; 00171 00172 //! Get number of DoG images 00173 uint getNumDoGImages() const; 00174 00175 //! Get a DoG image 00176 Image<float> getDoGImage(const uint idx) const; 00177 00178 //! Get a keypoint image for saliency submap 00179 Image<float> getKeypointImage(std::vector< rutz::shared_ptr<Keypoint> >& keypoints); 00180 00181 private : 00182 float itsOctScale; // scale factor relative to original image 00183 float itsSigma; // our baseline Gaussian sigma 00184 ImageSet<float> itsLumBlur; // gaussian blurred pictures for lum 00185 ImageSet<float> itsRGBlur; // gaussian blurred pictures for rg 00186 ImageSet<float> itsBYBlur; // gaussian blurred pictures for by 00187 ImageSet<float> itsDog; // difference of gaussian pictures 00188 bool itsUseColor; // Should we add color? 00189 00190 // return true, if it's a max or min in the local 3x3 matrix 00191 bool checkForMinMax(const int x, const int y, const Image<float> & im0, 00192 const Image<float> & im1, const Image<float> & im2) 00193 const; 00194 00195 // fits the paraboloid in (X, Y, S) space : 3D Hessian matrix inversion 00196 Image<float> fit3D(const int x, const int y, const int s, Image<float>& dDog, 00197 float& dX2, float& dY2, float& dXdY) const; 00198 00199 // Localization and pruning of keypoint 00200 // This is really the heart of the SIFT algorithm 00201 // - 3D interpolation to find the real position of the point in (x, y, s) 00202 // space 00203 // - handling of already done position 00204 // - feature vector calculation 00205 // - returns number of keypoints found and added 00206 uint accurateLocalizationAndPruning(const int x, const int y, const int s, 00207 Image<byte>& analyzed, 00208 const ImageSet<float>& gradmag, 00209 const ImageSet<float>& gradori, 00210 std::vector< rutz::shared_ptr<Keypoint> >& 00211 keypoints); 00212 00213 void calculateOrientationVector(const float x, const float y, const float s, 00214 const ImageSet<float>& gradmag, const ImageSet<float>& gradorie, Histogram& OV); 00215 00216 uint createVectorsAndKeypoints(const float x, const float y, const float s, const float dogmag, 00217 const ImageSet<float>& gradmag, const ImageSet<float>& gradorie, 00218 std::vector < rutz::shared_ptr<Keypoint> >& keypoints, Histogram& OV); 00219 00220 00221 // create Keypoint(s) at a verified good location and add them to a 00222 // VisualObject. Returns number of Keypoints added. 00223 uint createKeypoints(const float x, const float y, const float s, 00224 const float dogmag, const ImageSet<float>& gradmag, 00225 const ImageSet<float>& gradorie, 00226 std::vector< rutz::shared_ptr<Keypoint> >& keypoints); 00227 }; 00228 00229 00230 #endif 00231 00232 // ###################################################################### 00233 /* So things look consistent in everyone's emacs... */ 00234 /* Local Variables: */ 00235 /* indent-tabs-mode: nil */ 00236 /* End: */