ScaleSpace.H

Go to the documentation of this file.
00001 /*!@file SIFT/ScaleSpace.H ScaleSpace computation for SIFT obj recognition */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the //
00005 // University of Southern California (USC) and the iLab at USC.         //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: James Bonaiuto <bonaiuto@usc.edu>
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/SIFT/ScaleSpace.H $
00035 // $Id: ScaleSpace.H 10423 2008-11-12 22:26:07Z mviswana $
00036 //
00037 
00038 #ifndef SCALESPACE_H_DEFINED
00039 #define SCALESPACE_H_DEFINED
00040 
00041 #include "Image/Image.H"
00042 #include "Image/ImageSet.H"
00043 #include "SIFT/Histogram.H"
00044 #include "rutz/shared_ptr.h"
00045 
00046 #include <vector>
00047 class Keypoint;
00048 
00049 
00050 //! ScaleSpace is like a Gaussian pyramid but with several images per level
00051 /*! ScaleSpace is a variation of a Gaussian pyramid, where we have
00052   several images convolved by increasingly large Gaussian filters for
00053   each level (pixel size). So, while in the standard dyadic Gaussian
00054   pyramid we convolve an image at a given resolution, then decimate by
00055   a factor 2 it to yield the next lower resolution, here we convolve
00056   at each resolution several times with increasingly large Gaussians
00057   and decimate only once every several images. This ScaleSpace class
00058   represents the several convolved images that are obtained for one
00059   resolution (one so-called octave). Additionally, ScaleSpace provides
00060   machinery for SIFT keypoint extraction. In the SIFT algorithm we use
00061   several ScaleSpace objects for several image resolutions.
00062 
00063   Some of the code here based on the Hugin panorama software - see
00064   http://hugin.sourceforge.net - but substantial modifications have
00065   been made, espacially by carefully going over David Lowe's IJCV 2004
00066   paper. Some of the code also comes from http://autopano.kolor.com/
00067   but substantial debugging has also been made. */
00068 class ScaleSpace
00069 {
00070 public:
00071         //Channles def
00072         enum ChannlesDef {LUM_CHANNEL = 0, RG_CHANNEL = 1, BY_CHANNEL = 2};
00073 
00074   //! Constructor
00075   /*! Take an input image and create a bunch of Gaussian-convolved
00076     versions of it as well as a bunch of DoG versions of it.  We
00077     obtain the following images, given 's' images per octave and a
00078     baseline Gaussian sigma of 'sigma'. In all that follows, [*]
00079     represents the convolution operator.
00080 
00081     We want that once we have convolved the image 's' times we end up
00082     with an images blurred by 2 * sigma. It is well known that
00083     convolving several times by Gaussians is equivalent to convolving
00084     once by a single Gaussian whose variance (sigma^2) is the sum of
00085     all the variances of the other Gaussians. It is easy to show that
00086     we then want to convolve each image (n >= 1) by a Gaussian of
00087     stdev std(n) = sigma(n-1) * sqrt(k*k - 1.0) where k=2^(1/s) and
00088     sigma(n) = sigma * k^n, which means that each time we multiply the
00089     total effective sigma by k. So, in summary, for every n >= 1,
00090     std(n) = sigma * k^(n-1) * sqrt(k*k - 1.0).
00091 
00092     For our internal Gaussian imageset, we get s+3 images; the reason
00093     for that is that we want to have all the possible DoGs for that
00094     octave:
00095 
00096     blur[  0] =                           in [*] G(sigma)
00097     blur[  1] = blur[0]   [*] G(std(1)) = in [*] G(k*sigma)
00098     blur[  2] = blur[1]   [*] G(std(2)) = in [*] G(k^2*sigma)
00099     ...
00100     blur[  s] = blur[s-1] [*] G(std(s)) = in [*] G(k^s*sigma) = in[*]G(2*sigma)
00101     blur[s+1] = blur[s]   [*] G(std(s+1)) = in [*] G(2*k*sigma)
00102     blur[s+2] = blur[s+1] [*] G(std(s+2)) = in [*] G(2*k^2*sigma)
00103 
00104     For our internal DoG imageset, we just take differences between
00105     two adjacent images in the blurred imageset, yielding s+2 images
00106     such that:
00107 
00108     dog[  0] = in [*] (G(k*sigma) - G(sigma))
00109     dog[  1] = in [*] (G(k^2*sigma) - G(k*sigma))
00110     dog[  2] = in [*] (G(k^3*sigma) - G(k^2*sigma))
00111     ...
00112     dog[  s] = in [*] (G(2*sigma) - G(k^(s-1)*sigma))
00113     dog[s+1] = in [*] (G(2*k*sigma) - G(2*sigma))
00114 
00115     The reason why we need to compute dog[s+1] is that to find
00116     keypoints we will look for extrema in the scalespace, which
00117     requires comparing the dog value at a given level (1 .. s) to the
00118     dog values at levels immediately above and immediately below.
00119 
00120     To chain ScaleSpaces, you should construct a first one from your
00121     original image, after you have ensured that it has a blur of
00122     'sigma'. Then do a getTwoSigmaImage(), decimate the result using
00123     decXY(), and construct your next ScaleSpace directly from
00124     that. Note that when you chain ScaleSpace objects, the effective
00125     blurring accelerates: the first ScaleSpace goes from sigma to
00126     2.0*sigma, the second from 2.0*sigma to 4.0*sigma, and so on.
00127 
00128     @param in the input image.
00129     @param octscale baseline octave scale for this scalespace compared
00130     to the size of our original image. For example, if 'in' has been
00131     decimated by a factor 4 horizontally and vertically relatively to
00132     the original input image, octscale should be 4.0. This is used
00133     later on when assembling keypoint descriptors, so that we remember
00134     to scale the coordinates to the keypoints from our coordinates
00135     (computed in a possibly reduced input image) back to the
00136     coordinates of the original input image.
00137     @param s number of Gaussian scales per octave.
00138     @param sigma baseline Gaussian sigma to use. */
00139   ScaleSpace(const ImageSet<float>& in, const float octscale,
00140              const int s = 3, const float sigma = 1.6F, bool useColor=false);
00141 
00142   //! Destructor
00143   ~ScaleSpace();
00144 
00145   //! Get the image blurred by 2*sigma
00146   /*! This image can then be downscaled and fed as input to the next
00147     ScaleSpace. */
00148   Image<float> getTwoSigmaImage(int channel) const;
00149 
00150   //! Find the keypoints in the previously provided picture
00151   /*! The ScaleSpace will be searched for good SIFT keypoints. Any
00152     keypoint found will be pushed to the back of the provided
00153     vector. Note that the vector is not cleared, so if you pass a
00154     non-empty vector, new keypoints will just be added to is. Returns
00155     the number of keypoints added to the vector of keypoints. */
00156   uint findKeypoints(std::vector< rutz::shared_ptr<Keypoint> >& keypoints);
00157 
00158   //! Get the keypoints in the previously provided picture
00159   /*! The ScaleSpace will extract keypoints in a grid setup. Those
00160     keypoints will be pushed to the back of the provided
00161     vector. Note that the vector is not cleared, so if you pass a
00162     non-empty vector, new keypoints will just be added to it. Returns
00163     the number of keypoints added to the vector of keypoints. */
00164   uint getGridKeypoints(std::vector<rutz::shared_ptr<Keypoint> >& keypoints);
00165 
00166   //! Get number of blurred images
00167   uint getNumBlurredImages() const;
00168 
00169   //! Get a blurred image
00170   Image<float> getBlurredImage(const uint idx) const;
00171 
00172   //! Get number of DoG images
00173   uint getNumDoGImages() const;
00174 
00175   //! Get a DoG image
00176   Image<float> getDoGImage(const uint idx) const;
00177 
00178   //! Get a keypoint image for saliency submap
00179   Image<float> getKeypointImage(std::vector< rutz::shared_ptr<Keypoint> >& keypoints);
00180 
00181 private :
00182   float itsOctScale;        // scale factor relative to original image
00183   float itsSigma;           // our baseline Gaussian sigma
00184   ImageSet<float> itsLumBlur;  // gaussian blurred pictures for lum
00185   ImageSet<float> itsRGBlur;  // gaussian blurred pictures for rg
00186   ImageSet<float> itsBYBlur;  // gaussian blurred pictures for by
00187   ImageSet<float> itsDog;   // difference of gaussian pictures
00188   bool itsUseColor;                         // Should we add color?
00189 
00190   // return true, if it's a max or min in the local 3x3 matrix
00191   bool checkForMinMax(const int x, const int y, const Image<float> & im0,
00192                       const Image<float> & im1, const Image<float> & im2)
00193     const;
00194 
00195   // fits the paraboloid in (X, Y, S) space : 3D Hessian matrix inversion
00196   Image<float> fit3D(const int x, const int y, const int s, Image<float>& dDog,
00197                      float& dX2, float& dY2, float& dXdY) const;
00198 
00199   // Localization and pruning of keypoint
00200   //   This is really the heart of the SIFT algorithm
00201   //     - 3D interpolation to find the real position of the point in (x, y, s)
00202   //       space
00203   //     - handling of already done position
00204   //     - feature vector calculation
00205   //     - returns number of keypoints found and added
00206   uint accurateLocalizationAndPruning(const int x, const int y, const int s,
00207                                       Image<byte>& analyzed,
00208                                       const ImageSet<float>& gradmag,
00209                                       const ImageSet<float>& gradori,
00210                                       std::vector< rutz::shared_ptr<Keypoint> >&
00211                                       keypoints);
00212 
00213   void calculateOrientationVector(const float x, const float y, const float s,
00214                   const ImageSet<float>& gradmag, const ImageSet<float>& gradorie, Histogram& OV);
00215 
00216   uint createVectorsAndKeypoints(const float x, const float y, const float s, const float dogmag,
00217                   const ImageSet<float>& gradmag, const ImageSet<float>& gradorie,
00218                   std::vector < rutz::shared_ptr<Keypoint> >& keypoints, Histogram& OV);
00219 
00220 
00221   // create Keypoint(s) at a verified good location and add them to a
00222   // VisualObject. Returns number of Keypoints added.
00223   uint createKeypoints(const float x, const float y, const float s,
00224                        const float dogmag, const ImageSet<float>& gradmag,
00225                        const ImageSet<float>& gradorie,
00226                        std::vector< rutz::shared_ptr<Keypoint> >& keypoints);
00227 };
00228 
00229 
00230 #endif
00231 
00232 // ######################################################################
00233 /* So things look consistent in everyone's emacs... */
00234 /* Local Variables: */
00235 /* indent-tabs-mode: nil */
00236 /* End: */