VisualObjectMatch.H

Go to the documentation of this file.
00001 /*!@file SIFT/VisualObjectMatch.H Header for visual object matches */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the //
00005 // University of Southern California (USC) and the iLab at USC.         //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: Laurent Itti <itti@usc.edu>
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/SIFT/VisualObjectMatch.H $
00035 // $Id: VisualObjectMatch.H 9412 2008-03-10 23:10:15Z farhan $
00036 //
00037 
00038 #ifndef VISUALOBJECTMATCH_H_DEFINED
00039 #define VISUALOBJECTMATCH_H_DEFINED
00040 
00041 #include "Image/Pixels.H"
00042 #include "Image/Image.H"
00043 #include "SIFT/Keypoint.H"
00044 #include "SIFT/KeypointMatch.H"
00045 #include "SIFT/SIFTaffine.H"
00046 #include "SIFT/VisualObjectMatchAlgo.H"
00047 #include <vector>
00048 class KDTree;
00049 class VisualObject;
00050 
00051 //! Simple class to store lists of Keypoint matches between two VisualObject
00052 class VisualObjectMatch {
00053 public:
00054 
00055   //! Constructor from two visual objects
00056   /*! If using a KDTree matching algo, the KDTree will be built in
00057     'voref' (with caching). So voref should usually be the object that
00058     has more keypoints than votest. */
00059   VisualObjectMatch(const rutz::shared_ptr<VisualObject>& voref,
00060                     const rutz::shared_ptr<VisualObject>& votest,
00061                     const VisualObjectMatchAlgo algo,
00062                     const uint thresh = 7U);
00063 
00064   //! Constructor from a pre-built KDTree and a VisualObject
00065   /*! The KDTree should have been filled already and is considered the
00066     'reference' object. Matching algo must be either VOMA_KDTREE or
00067     VOMA_KDTREEBBF. */
00068   VisualObjectMatch(const rutz::shared_ptr<KDTree>& kdref,
00069                     const rutz::shared_ptr<VisualObject>& votest,
00070                     const VisualObjectMatchAlgo algo,
00071                     const uint thresh = 7U);
00072 
00073   //! Build from a precomputed list of KeypointMatch matches
00074   /*! Normally you would not want to call this unless you got your
00075     KeypointMatch matches from somewhere. This is used by
00076     VisualObjectDB during KDTree-based matching. */
00077   VisualObjectMatch(const rutz::shared_ptr<VisualObject>& voref,
00078                     const rutz::shared_ptr<VisualObject>& votest,
00079                     const std::vector<KeypointMatch>& kpm);
00080 
00081   //! Destructor
00082   ~VisualObjectMatch();
00083 
00084   //! Add a Keypoint match to the back of our internal list
00085   inline void push_back(const KeypointMatch& km);
00086 
00087   //! Access a KeypointMatch, read-only version
00088   inline const KeypointMatch& operator[](const uint idx) const;
00089 
00090   //! Access a KeypointMatch, read/write version
00091   inline KeypointMatch& operator[](const uint idx);
00092 
00093   //! get a given KeypointMatch
00094   inline const KeypointMatch& getKeypointMatch(const uint index) const;
00095 
00096   //! Get number of Keypoint matches we currently have
00097   inline uint size() const;
00098 
00099   //! Get the reference VisualObject
00100   inline const rutz::shared_ptr<VisualObject>& getVoRef() const;
00101 
00102   //! Get the test VisualObject
00103   inline const rutz::shared_ptr<VisualObject>& getVoTest() const;
00104 
00105   //! Apply a standard series of prunings
00106   /*! This is a heuristic combination of calls to pruneByDist(),
00107     pruneByHough(), and pruneByAff(), so as to prune outlier matches
00108     and allow the recovery of a clean affine transform through
00109     getSIFTaffine(). Do not prune to fewer than 'minn' matches but try
00110     to prune down to fewer than 'maxn' matches (the latter is not
00111     guaranteed, as all matches may be very very good). Returns the
00112     number of outlier matches pruned away. */
00113   uint prune(const uint maxn = 25U, const uint minn = 3U);
00114 
00115   //! Prune our matches by ratio of best to second best distance
00116   /*! Normally you should just use prune() but this is made public for
00117     people who want finer control. The given thresh is in units of
00118     10%. For example, if thresh=8, matches where the match distance is
00119     more than 0.8 the second best distance will be eliminated. Returns
00120     the number of matches pruned. Do not prune to fewer than 'minn'
00121     matches. */
00122   uint pruneByDist(const uint thresh = 7U, const uint minn = 3U);
00123 
00124   //! Prune the matches using a Hough transform
00125   /*! Normally you should just use prune() but this is made public for
00126     people who want finer control. Returns the number of matches
00127     pruned. Do not prune to fewer than 'minn' matches. Prune matches
00128     that disagree with the most popular transform by more than
00129     'rangefac' times the range in any dimension; sensible values are
00130     between 0.05 (extremely strict; not recommended since the family
00131     of geometric transformations used here is very approximative) and
00132     0.5 (may not prune anything). */
00133   uint pruneByHough(const float rangefac = 0.25F, const uint minn = 3U);
00134 
00135   //! Prune our matches by consistency with affine transform
00136   /*! Normally you should just use prune() but this is made public for
00137     people who want finer control. Compute the affine transform from
00138     the matches, and eliminate the matches that disagree with it in
00139     that the distance in the test image between an affine-transformed
00140     reference keypoint and the matching test keypoint is larger than
00141     'dist'. During this process, we will not continue if fewer than
00142     'minn' matches remain. The number of outlier matches that were
00143     deleted is returned. */
00144   uint pruneByAff(const float dist = 5.0F, const uint minn = 3U);
00145 
00146   //! Compute least-squares affine transform between matches
00147   /*! If we have not already done so, we will compute the affine from
00148     all our Keypoint matches. Hence it is recommended that you call
00149     prune() prior to invoking this member function. */
00150   inline SIFTaffine getSIFTaffine();
00151 
00152   //! Check the SIFT affine for weirdness
00153   /*! This returns false if our affine represents too gross of a
00154     distortion between the two matched objects.
00155     @param maxrot max allwoed rotation; valid values are in [0..pi];
00156       default is to allow any rotation.
00157     @param maxscale max allowed scaling; valid values are > 0.0;
00158       default is to allow scaling from 1:10 to 10:1.
00159     @param maxshear max allowed shearing; valid values are >= 0.0;
00160       default is to allow shearing between -0.5 and 0.5. */
00161   bool checkSIFTaffine(const float maxrot = 10.0F, const float maxscale = 30.0F,
00162                        const float maxshear = 10.0F);
00163 
00164   //! Get a match score, higher scores are better
00165   /*! Score returned here is defined as kcoeff / (1 +
00166     getKeypointAvgDist()) + acoeff / (1 + getAffineAvgDist()) +
00167     0.05 * numKeypointMatches */
00168   float getScore(const float kcoeff = 0.5F, const float acoeff = 0.5F);
00169 
00170   //! get a match score based on the salient feature difference *
00171   //! spatial distance
00172   float getSalScore(const float wcoeff = 0.0F, const float hcoeff = 0.0F);
00173 
00174   //! spatial distance of the salient points
00175   float getSalDist();
00176 
00177   //! salient feature vector difference
00178   float getSalDiff();
00179 
00180   //! Compute a matching score based on average residual distsq btw keys
00181   /*! Normally you would just use getScore() but this is made public
00182     for people who want finer control. The distance here is scaled so
00183     as to become comparable to that of getKeypointAvgDist(). */
00184   float getKeypointAvgDist();
00185 
00186   //! Compute matching score based on average residual distance between keys
00187   /*! Normally you would just use getScore() but this is made public
00188     for people who want finer control.  You should call this after you
00189     have pruned. We here just get the SIFTaffine and compute the
00190     average residual distance between each ref keypoint transformed by
00191     the affine and the corresponding test keypoint. Units hence are
00192     pixels in the test image. */
00193   float getAffineAvgDist();
00194 
00195   //! Get a combo image with SIFT Keypoint matches
00196   Image< PixRGB<byte> > getMatchImage(const float scale = 1.0F) const;
00197 
00198   //! Get a combo image with SIFT Keypoint matches
00199   //! this one has a frame around it so that different size images
00200   //! can be reconciled and the offset can also be added
00201   Image< PixRGB<byte> > getMatchImage(Dims frameSize,
00202                                       Point2D<int> refOffset, Point2D<int> testOffset,
00203                                       const float scale = 1.0F) const;
00204 
00205   //! Get the image of the test object transformed to match the ref object
00206   /*! If an uninitialized image is given, the resulting image has the
00207     size of the ref object's, and contains zeros everywhere except
00208     where the test object is. Otherwise, the test object is just
00209     painted into the given image (which must have the dims of the ref
00210     image. */
00211   Image< PixRGB<byte> > getTransfTestImage(const Image< PixRGB<byte> >&
00212                                            im = Image< PixRGB<byte> >());
00213 
00214   //! Get the transformed coords of the 4 corners of the test image
00215   /*! The returned points have coords in the coord system of the ref
00216     image, but may fall outside that image. */
00217   void getTransfTestOutline(Point2D<int>& tl, Point2D<int>& tr,
00218                             Point2D<int>& br, Point2D<int>& bl);
00219 
00220   //! Get the image of the test object fused on top of that of the ref obj
00221   /*! The resulting image has the size of the ref object's. A mixing
00222     factor of 1.0 means that the ref image gets a coeff 1.0 and the
00223     test image 0.0. To get the affine transform between the two
00224     images, getSIFTaffine will be called, without any pruning. So you
00225     may want to prune and clean the matches before you get that final
00226     display. */
00227   Image< PixRGB<byte> > getFusedImage(const float mix = 0.5F);
00228 
00229   //! Get our list of Keypoint matches
00230   inline const std::vector<KeypointMatch>& getKeypointMatches() const;
00231 
00232   //! get the spatial distance between the two objects
00233   /*! the offsets passed in are the the coordinates of
00234     the top left corner of each image. The result is the distance of the
00235     two respective origins, which is also the coordinate change of the
00236     two objects.
00237     This is useful for visual objects that come from a cropped frame.
00238     The result can be the camera movement or egomotion
00239    */
00240   Point2D<int> getSpatialDist(Point2D<int> objOffset1 = Point2D<int>(0,0),
00241                          Point2D<int> objOffset2 = Point2D<int>(0,0));
00242 
00243   //! get the overlap rectangle of the match
00244   Rectangle getOverlapRect();
00245 
00246   //! check overlap using the image rectangle overlap
00247   bool isOverlapping();
00248 
00249   //! check overlap using keypoint bounding box overlap
00250   bool isOverlapping2();
00251 
00252 private:
00253   VisualObjectMatch(const VisualObjectMatch& m);  //!< forbid copy-contruction
00254   VisualObjectMatch& operator=(const VisualObjectMatch& m); //!< forbid copy
00255 
00256   //! our first (reference) object
00257   rutz::shared_ptr<VisualObject> itsVoRef;
00258 
00259   //! our second (test) object
00260   rutz::shared_ptr<VisualObject> itsVoTest;
00261 
00262   //! matches between our objects
00263   std::vector<KeypointMatch> itsMatches;
00264 
00265   //! KDTree for our reference object
00266   rutz::shared_ptr<KDTree> itsKDTree;
00267 
00268   //! did we already compute an affine?
00269   bool itsHasAff;
00270 
00271   //! our current affine transform
00272   SIFTaffine itsAff;
00273 
00274   //! did we already compute the keypoint average distance?
00275   bool itsHasKpAvgDist;
00276 
00277   //! current keypoint average distance
00278   float itsKpAvgDist;
00279 
00280   //! did we already compute the affine average distance?
00281   bool itsHasAfAvgDist;
00282 
00283   //! current affine average distance
00284   float itsAfAvgDist;
00285 
00286   // Find matching Keypoints using simple brute-force algo. Returns
00287   // number of Keypoint matches.
00288   uint matchSimple(const uint thresh);
00289 
00290   // Find matching Keypoints using a KDTree to accelerate search for
00291   // matches. The KDTree will be built in the itsVoRef object. Hence,
00292   // the ref object should usually be one with many more keypoints
00293   // than the test object. Note that KDTrees are cached, so only the
00294   // first time matchKDTree() is called will the KDTree actually be
00295   // built. Returns number of Keypoint matches. If bbf is 0, then
00296   // normal (very slow) KDTree matching is done; otherwise, BBF
00297   // matching is done with up to bbf steps.
00298   uint matchKDTree(const uint thresh, const int bbf);
00299 
00300   // compute our affine transform based on all our matches. This
00301   // should be called only once outliers have been removed.
00302   void computeAffine();
00303 
00304   // get keypoint differences; used internally by pruneByHough()
00305   inline void getKdiff(const KeypointMatch& km, float& dx,
00306                        float& dy, float& doo, float& ds) const;
00307 };
00308 
00309 // ######################################################################
00310 // ########## INLINED FUNCTIONS
00311 // ######################################################################
00312 inline uint VisualObjectMatch::size() const
00313 { return itsMatches.size(); }
00314 
00315 inline void VisualObjectMatch::push_back(const KeypointMatch& km)
00316 { itsMatches.push_back(km); }
00317 
00318 inline const KeypointMatch&
00319 VisualObjectMatch::operator[](const uint idx) const
00320 { return itsMatches[idx]; }
00321 
00322 inline KeypointMatch&
00323 VisualObjectMatch::operator[](const uint idx)
00324 { return itsMatches[idx]; }
00325 
00326 inline const KeypointMatch&
00327 VisualObjectMatch::getKeypointMatch(const uint index) const
00328 {
00329   ASSERT(index < itsMatches.size());
00330   return itsMatches[index];
00331 }
00332 
00333 inline const rutz::shared_ptr<VisualObject>&
00334 VisualObjectMatch::getVoRef() const
00335 { return itsVoRef; }
00336 
00337 inline const rutz::shared_ptr<VisualObject>&
00338 VisualObjectMatch::getVoTest() const
00339 { return itsVoTest; }
00340 
00341 inline SIFTaffine VisualObjectMatch::getSIFTaffine()
00342 {
00343   if (itsHasAff == false) computeAffine();
00344   return itsAff;
00345 }
00346 
00347 inline void VisualObjectMatch::getKdiff
00348 ( const KeypointMatch& km, float& dx,
00349   float& dy, float& doo, float& ds) const
00350 {
00351   rutz::shared_ptr<Keypoint> refkp = km.refkp;
00352   rutz::shared_ptr<Keypoint> tstkp = km.tstkp;
00353 
00354   dx = tstkp->getX() - refkp->getX();
00355   dy = tstkp->getY() - refkp->getY();
00356   ds = tstkp->getS() - refkp->getS();
00357   doo = tstkp->getO() - refkp->getO();
00358   while (doo < -M_PI) doo += 2.0F * M_PI;
00359   while (doo > M_PI) doo -= 2.0F * M_PI;
00360 }
00361 
00362 inline const std::vector<KeypointMatch>&
00363 VisualObjectMatch::getKeypointMatches() const
00364 { return itsMatches; }
00365 
00366 #endif
00367 
00368 // ######################################################################
00369 /* So things look consistent in everyone's emacs... */
00370 /* Local Variables: */
00371 /* indent-tabs-mode: nil */
00372 /* End: */