00001 /*!@file SIFT/VisualObjectMatch.H Header for visual object matches */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00005 // University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Laurent Itti <itti@usc.edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/SIFT/VisualObjectMatch.H $ 00035 // $Id: VisualObjectMatch.H 9412 2008-03-10 23:10:15Z farhan $ 00036 // 00037 00038 #ifndef VISUALOBJECTMATCH_H_DEFINED 00039 #define VISUALOBJECTMATCH_H_DEFINED 00040 00041 #include "Image/Pixels.H" 00042 #include "Image/Image.H" 00043 #include "SIFT/Keypoint.H" 00044 #include "SIFT/KeypointMatch.H" 00045 #include "SIFT/SIFTaffine.H" 00046 #include "SIFT/VisualObjectMatchAlgo.H" 00047 #include <vector> 00048 class KDTree; 00049 class VisualObject; 00050 00051 //! Simple class to store lists of Keypoint matches between two VisualObject 00052 class VisualObjectMatch { 00053 public: 00054 00055 //! Constructor from two visual objects 00056 /*! If using a KDTree matching algo, the KDTree will be built in 00057 'voref' (with caching). So voref should usually be the object that 00058 has more keypoints than votest. */ 00059 VisualObjectMatch(const rutz::shared_ptr<VisualObject>& voref, 00060 const rutz::shared_ptr<VisualObject>& votest, 00061 const VisualObjectMatchAlgo algo, 00062 const uint thresh = 7U); 00063 00064 //! Constructor from a pre-built KDTree and a VisualObject 00065 /*! The KDTree should have been filled already and is considered the 00066 'reference' object. Matching algo must be either VOMA_KDTREE or 00067 VOMA_KDTREEBBF. */ 00068 VisualObjectMatch(const rutz::shared_ptr<KDTree>& kdref, 00069 const rutz::shared_ptr<VisualObject>& votest, 00070 const VisualObjectMatchAlgo algo, 00071 const uint thresh = 7U); 00072 00073 //! Build from a precomputed list of KeypointMatch matches 00074 /*! Normally you would not want to call this unless you got your 00075 KeypointMatch matches from somewhere. This is used by 00076 VisualObjectDB during KDTree-based matching. */ 00077 VisualObjectMatch(const rutz::shared_ptr<VisualObject>& voref, 00078 const rutz::shared_ptr<VisualObject>& votest, 00079 const std::vector<KeypointMatch>& kpm); 00080 00081 //! Destructor 00082 ~VisualObjectMatch(); 00083 00084 //! Add a Keypoint match to the back of our internal list 00085 inline void push_back(const KeypointMatch& km); 00086 00087 //! Access a KeypointMatch, read-only version 00088 inline const KeypointMatch& operator[](const uint idx) const; 00089 00090 //! Access a KeypointMatch, read/write version 00091 inline KeypointMatch& operator[](const uint idx); 00092 00093 //! get a given KeypointMatch 00094 inline const KeypointMatch& getKeypointMatch(const uint index) const; 00095 00096 //! Get number of Keypoint matches we currently have 00097 inline uint size() const; 00098 00099 //! Get the reference VisualObject 00100 inline const rutz::shared_ptr<VisualObject>& getVoRef() const; 00101 00102 //! Get the test VisualObject 00103 inline const rutz::shared_ptr<VisualObject>& getVoTest() const; 00104 00105 //! Apply a standard series of prunings 00106 /*! This is a heuristic combination of calls to pruneByDist(), 00107 pruneByHough(), and pruneByAff(), so as to prune outlier matches 00108 and allow the recovery of a clean affine transform through 00109 getSIFTaffine(). Do not prune to fewer than 'minn' matches but try 00110 to prune down to fewer than 'maxn' matches (the latter is not 00111 guaranteed, as all matches may be very very good). Returns the 00112 number of outlier matches pruned away. */ 00113 uint prune(const uint maxn = 25U, const uint minn = 3U); 00114 00115 //! Prune our matches by ratio of best to second best distance 00116 /*! Normally you should just use prune() but this is made public for 00117 people who want finer control. The given thresh is in units of 00118 10%. For example, if thresh=8, matches where the match distance is 00119 more than 0.8 the second best distance will be eliminated. Returns 00120 the number of matches pruned. Do not prune to fewer than 'minn' 00121 matches. */ 00122 uint pruneByDist(const uint thresh = 7U, const uint minn = 3U); 00123 00124 //! Prune the matches using a Hough transform 00125 /*! Normally you should just use prune() but this is made public for 00126 people who want finer control. Returns the number of matches 00127 pruned. Do not prune to fewer than 'minn' matches. Prune matches 00128 that disagree with the most popular transform by more than 00129 'rangefac' times the range in any dimension; sensible values are 00130 between 0.05 (extremely strict; not recommended since the family 00131 of geometric transformations used here is very approximative) and 00132 0.5 (may not prune anything). */ 00133 uint pruneByHough(const float rangefac = 0.25F, const uint minn = 3U); 00134 00135 //! Prune our matches by consistency with affine transform 00136 /*! Normally you should just use prune() but this is made public for 00137 people who want finer control. Compute the affine transform from 00138 the matches, and eliminate the matches that disagree with it in 00139 that the distance in the test image between an affine-transformed 00140 reference keypoint and the matching test keypoint is larger than 00141 'dist'. During this process, we will not continue if fewer than 00142 'minn' matches remain. The number of outlier matches that were 00143 deleted is returned. */ 00144 uint pruneByAff(const float dist = 5.0F, const uint minn = 3U); 00145 00146 //! Compute least-squares affine transform between matches 00147 /*! If we have not already done so, we will compute the affine from 00148 all our Keypoint matches. Hence it is recommended that you call 00149 prune() prior to invoking this member function. */ 00150 inline SIFTaffine getSIFTaffine(); 00151 00152 //! Check the SIFT affine for weirdness 00153 /*! This returns false if our affine represents too gross of a 00154 distortion between the two matched objects. 00155 @param maxrot max allwoed rotation; valid values are in [0..pi]; 00156 default is to allow any rotation. 00157 @param maxscale max allowed scaling; valid values are > 0.0; 00158 default is to allow scaling from 1:10 to 10:1. 00159 @param maxshear max allowed shearing; valid values are >= 0.0; 00160 default is to allow shearing between -0.5 and 0.5. */ 00161 bool checkSIFTaffine(const float maxrot = 10.0F, const float maxscale = 30.0F, 00162 const float maxshear = 10.0F); 00163 00164 //! Get a match score, higher scores are better 00165 /*! Score returned here is defined as kcoeff / (1 + 00166 getKeypointAvgDist()) + acoeff / (1 + getAffineAvgDist()) + 00167 0.05 * numKeypointMatches */ 00168 float getScore(const float kcoeff = 0.5F, const float acoeff = 0.5F); 00169 00170 //! get a match score based on the salient feature difference * 00171 //! spatial distance 00172 float getSalScore(const float wcoeff = 0.0F, const float hcoeff = 0.0F); 00173 00174 //! spatial distance of the salient points 00175 float getSalDist(); 00176 00177 //! salient feature vector difference 00178 float getSalDiff(); 00179 00180 //! Compute a matching score based on average residual distsq btw keys 00181 /*! Normally you would just use getScore() but this is made public 00182 for people who want finer control. The distance here is scaled so 00183 as to become comparable to that of getKeypointAvgDist(). */ 00184 float getKeypointAvgDist(); 00185 00186 //! Compute matching score based on average residual distance between keys 00187 /*! Normally you would just use getScore() but this is made public 00188 for people who want finer control. You should call this after you 00189 have pruned. We here just get the SIFTaffine and compute the 00190 average residual distance between each ref keypoint transformed by 00191 the affine and the corresponding test keypoint. Units hence are 00192 pixels in the test image. */ 00193 float getAffineAvgDist(); 00194 00195 //! Get a combo image with SIFT Keypoint matches 00196 Image< PixRGB<byte> > getMatchImage(const float scale = 1.0F) const; 00197 00198 //! Get a combo image with SIFT Keypoint matches 00199 //! this one has a frame around it so that different size images 00200 //! can be reconciled and the offset can also be added 00201 Image< PixRGB<byte> > getMatchImage(Dims frameSize, 00202 Point2D<int> refOffset, Point2D<int> testOffset, 00203 const float scale = 1.0F) const; 00204 00205 //! Get the image of the test object transformed to match the ref object 00206 /*! If an uninitialized image is given, the resulting image has the 00207 size of the ref object's, and contains zeros everywhere except 00208 where the test object is. Otherwise, the test object is just 00209 painted into the given image (which must have the dims of the ref 00210 image. */ 00211 Image< PixRGB<byte> > getTransfTestImage(const Image< PixRGB<byte> >& 00212 im = Image< PixRGB<byte> >()); 00213 00214 //! Get the transformed coords of the 4 corners of the test image 00215 /*! The returned points have coords in the coord system of the ref 00216 image, but may fall outside that image. */ 00217 void getTransfTestOutline(Point2D<int>& tl, Point2D<int>& tr, 00218 Point2D<int>& br, Point2D<int>& bl); 00219 00220 //! Get the image of the test object fused on top of that of the ref obj 00221 /*! The resulting image has the size of the ref object's. A mixing 00222 factor of 1.0 means that the ref image gets a coeff 1.0 and the 00223 test image 0.0. To get the affine transform between the two 00224 images, getSIFTaffine will be called, without any pruning. So you 00225 may want to prune and clean the matches before you get that final 00226 display. */ 00227 Image< PixRGB<byte> > getFusedImage(const float mix = 0.5F); 00228 00229 //! Get our list of Keypoint matches 00230 inline const std::vector<KeypointMatch>& getKeypointMatches() const; 00231 00232 //! get the spatial distance between the two objects 00233 /*! the offsets passed in are the the coordinates of 00234 the top left corner of each image. The result is the distance of the 00235 two respective origins, which is also the coordinate change of the 00236 two objects. 00237 This is useful for visual objects that come from a cropped frame. 00238 The result can be the camera movement or egomotion 00239 */ 00240 Point2D<int> getSpatialDist(Point2D<int> objOffset1 = Point2D<int>(0,0), 00241 Point2D<int> objOffset2 = Point2D<int>(0,0)); 00242 00243 //! get the overlap rectangle of the match 00244 Rectangle getOverlapRect(); 00245 00246 //! check overlap using the image rectangle overlap 00247 bool isOverlapping(); 00248 00249 //! check overlap using keypoint bounding box overlap 00250 bool isOverlapping2(); 00251 00252 private: 00253 VisualObjectMatch(const VisualObjectMatch& m); //!< forbid copy-contruction 00254 VisualObjectMatch& operator=(const VisualObjectMatch& m); //!< forbid copy 00255 00256 //! our first (reference) object 00257 rutz::shared_ptr<VisualObject> itsVoRef; 00258 00259 //! our second (test) object 00260 rutz::shared_ptr<VisualObject> itsVoTest; 00261 00262 //! matches between our objects 00263 std::vector<KeypointMatch> itsMatches; 00264 00265 //! KDTree for our reference object 00266 rutz::shared_ptr<KDTree> itsKDTree; 00267 00268 //! did we already compute an affine? 00269 bool itsHasAff; 00270 00271 //! our current affine transform 00272 SIFTaffine itsAff; 00273 00274 //! did we already compute the keypoint average distance? 00275 bool itsHasKpAvgDist; 00276 00277 //! current keypoint average distance 00278 float itsKpAvgDist; 00279 00280 //! did we already compute the affine average distance? 00281 bool itsHasAfAvgDist; 00282 00283 //! current affine average distance 00284 float itsAfAvgDist; 00285 00286 // Find matching Keypoints using simple brute-force algo. Returns 00287 // number of Keypoint matches. 00288 uint matchSimple(const uint thresh); 00289 00290 // Find matching Keypoints using a KDTree to accelerate search for 00291 // matches. The KDTree will be built in the itsVoRef object. Hence, 00292 // the ref object should usually be one with many more keypoints 00293 // than the test object. Note that KDTrees are cached, so only the 00294 // first time matchKDTree() is called will the KDTree actually be 00295 // built. Returns number of Keypoint matches. If bbf is 0, then 00296 // normal (very slow) KDTree matching is done; otherwise, BBF 00297 // matching is done with up to bbf steps. 00298 uint matchKDTree(const uint thresh, const int bbf); 00299 00300 // compute our affine transform based on all our matches. This 00301 // should be called only once outliers have been removed. 00302 void computeAffine(); 00303 00304 // get keypoint differences; used internally by pruneByHough() 00305 inline void getKdiff(const KeypointMatch& km, float& dx, 00306 float& dy, float& doo, float& ds) const; 00307 }; 00308 00309 // ###################################################################### 00310 // ########## INLINED FUNCTIONS 00311 // ###################################################################### 00312 inline uint VisualObjectMatch::size() const 00313 { return itsMatches.size(); } 00314 00315 inline void VisualObjectMatch::push_back(const KeypointMatch& km) 00316 { itsMatches.push_back(km); } 00317 00318 inline const KeypointMatch& 00319 VisualObjectMatch::operator[](const uint idx) const 00320 { return itsMatches[idx]; } 00321 00322 inline KeypointMatch& 00323 VisualObjectMatch::operator[](const uint idx) 00324 { return itsMatches[idx]; } 00325 00326 inline const KeypointMatch& 00327 VisualObjectMatch::getKeypointMatch(const uint index) const 00328 { 00329 ASSERT(index < itsMatches.size()); 00330 return itsMatches[index]; 00331 } 00332 00333 inline const rutz::shared_ptr<VisualObject>& 00334 VisualObjectMatch::getVoRef() const 00335 { return itsVoRef; } 00336 00337 inline const rutz::shared_ptr<VisualObject>& 00338 VisualObjectMatch::getVoTest() const 00339 { return itsVoTest; } 00340 00341 inline SIFTaffine VisualObjectMatch::getSIFTaffine() 00342 { 00343 if (itsHasAff == false) computeAffine(); 00344 return itsAff; 00345 } 00346 00347 inline void VisualObjectMatch::getKdiff 00348 ( const KeypointMatch& km, float& dx, 00349 float& dy, float& doo, float& ds) const 00350 { 00351 rutz::shared_ptr<Keypoint> refkp = km.refkp; 00352 rutz::shared_ptr<Keypoint> tstkp = km.tstkp; 00353 00354 dx = tstkp->getX() - refkp->getX(); 00355 dy = tstkp->getY() - refkp->getY(); 00356 ds = tstkp->getS() - refkp->getS(); 00357 doo = tstkp->getO() - refkp->getO(); 00358 while (doo < -M_PI) doo += 2.0F * M_PI; 00359 while (doo > M_PI) doo -= 2.0F * M_PI; 00360 } 00361 00362 inline const std::vector<KeypointMatch>& 00363 VisualObjectMatch::getKeypointMatches() const 00364 { return itsMatches; } 00365 00366 #endif 00367 00368 // ###################################################################### 00369 /* So things look consistent in everyone's emacs... */ 00370 /* Local Variables: */ 00371 /* indent-tabs-mode: nil */ 00372 /* End: */