00001 /*!@file SIFT/VisualObject.C Visual Objects to be recognized */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00005 // University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Laurent Itti <itti@usc.edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/SIFT/VisualObject.C $ 00035 // $Id: VisualObject.C 14182 2010-10-29 01:44:29Z rand $ 00036 // 00037 00038 #include "SIFT/VisualObject.H" 00039 #include "SIFT/ScaleSpace.H" 00040 #include "Image/ColorOps.H" 00041 #include "Image/DrawOps.H" 00042 #include "Image/ShapeOps.H" 00043 #include "Image/Kernels.H" 00044 #include "Image/FilterOps.H" 00045 #include "Image/MathOps.H" 00046 #include "Image/Pixels.H" 00047 #include "Raster/Raster.H" 00048 00049 #include <algorithm> 00050 #include <cmath> 00051 #include <istream> 00052 #include <ostream> 00053 00054 #include <cctype> 00055 00056 namespace 00057 { 00058 bool isInteger(const std::string& s) 00059 { 00060 if (s.length() == 0) return false; 00061 00062 if (s[0] != '-' && !isdigit(s[0])) return false; 00063 00064 for (size_t i = 1; i < s.length(); ++i) 00065 if (!isdigit(s[i])) return false; 00066 00067 return true; 00068 } 00069 } 00070 // ###################################################################### 00071 // functor to assist with keypoint sorting: 00072 class lessKP 00073 { 00074 public: 00075 bool operator()(const rutz::shared_ptr<Keypoint>& x, 00076 const rutz::shared_ptr<Keypoint>& y) 00077 { return (*x) < (*y); } 00078 }; 00079 00080 // here is an implementation of is_sorted() (which turns out to be a 00081 // non-standard SGI extension to the STL and hence is not always 00082 // available), ripped from 00083 // http://lists.boost.org/MailArchives/boost/msg40406.php 00084 template <class ForwardIterator, class StrictWeakOrdering> 00085 bool myIsSorted(ForwardIterator begin, ForwardIterator end, 00086 StrictWeakOrdering comp) 00087 { 00088 if (begin == end) return true; 00089 00090 ForwardIterator next = begin; 00091 ++next; 00092 for (; next != end ; ++begin,++next) if (comp(*next, *begin)) return false; 00093 00094 return true; 00095 } 00096 00097 // ###################################################################### 00098 VisualObject::VisualObject(const std::string& name, 00099 const std::string& imagefname, 00100 const Image< PixRGB<byte> >& image, 00101 const Point2D<int>& salpt, 00102 const std::vector<float>& preattfeatures, 00103 const std::vector< rutz::shared_ptr<Keypoint> >& 00104 keypoints, 00105 const bool useColor, 00106 bool computeKP) : 00107 00108 itsName(name), itsImageFname(imagefname), itsImage(image), 00109 itsKeypoints(keypoints), itsSalPoint(salpt), itsFeatures(preattfeatures), 00110 itsIsSorted(false), itsUseColor(useColor),itsImageLoaded(true) 00111 { 00112 itsObjectSize = image.getDims(); 00113 if(computeKP) computeKeypoints(); 00114 } 00115 00116 // ###################################################################### 00117 void VisualObject::computeKeypoints() 00118 { 00119 // if we were given an image but no keypoints, let's extract them now: 00120 if (itsImage.initialized() && itsKeypoints.empty()) 00121 { 00122 LDEBUG("%s: initializing ScaleSpace from %dx%d image...", 00123 itsName.c_str(), itsImage.getWidth(), itsImage.getHeight()); 00124 00125 // compute the luminance of the image: 00126 Image<float> lum = luminance(itsImage); 00127 00128 // compute the opponent color space 00129 // and double the image 00130 Image<float> rg, by; 00131 if (itsUseColor){ 00132 getRGBY(itsImage, rg, by, 25.0F); 00133 rg = interpolate(rg); 00134 by = interpolate(by); 00135 } 00136 00137 // double the resolution: 00138 lum = interpolate(lum); 00139 00140 const int nums = 3; // recommended by David Lowe 00141 const double sigma = 1.6F; // recommended by David Lowe 00142 float octscale = 0.5F; // since we doubled the image 00143 00144 // To feed the first ScaleSpace in our series, apply some 00145 // initial blur so that the input image has an effective blur of 00146 // the desired sigma. We assume that the original image has a 00147 // blur of at least 0.5 by construction. Since its size has been 00148 // doubled (octscale=0.5), then that becomes 1.0. We assume that 00149 // the sigma=1.6 applies to the doubled image. Remember that the 00150 // variances add when we sequentially convolve by 00151 // Gaussians. Hence the additional blur we need is such that 00152 // sigma^2 = 1^2 + blursig^2: 00153 const float blursig = sqrtf(sigma * sigma - 1.0F); 00154 Image<float> kernel = gaussian<float>(1.0F, blursig, 00155 lum.getWidth(), 1.0F); 00156 kernel = kernel / float(sum(kernel)); 00157 lum = sepFilter(lum, kernel, kernel, CONV_BOUNDARY_CLEAN); 00158 00159 if (itsUseColor){ 00160 // scale the color space 00161 rg = sepFilter(rg, kernel, kernel, CONV_BOUNDARY_CLEAN); 00162 by = sepFilter(by, kernel, kernel, CONV_BOUNDARY_CLEAN); 00163 } 00164 00165 // let's do it: 00166 int iter = 0; uint numkp = 0; 00167 while (lum.getWidth() > 24 && lum.getHeight() > 24) 00168 { 00169 ImageSet<float> inImg(3); 00170 inImg[ScaleSpace::LUM_CHANNEL] = lum; 00171 00172 if (itsUseColor){ // add the color spaces to the input image 00173 inImg[ScaleSpace::RG_CHANNEL] = rg; 00174 inImg[ScaleSpace::BY_CHANNEL] = by; 00175 } 00176 00177 ScaleSpace ss(inImg, octscale, nums, sigma, itsUseColor); 00178 00179 // get a bunch of keypoints out of the ScaleSpace: 00180 uint nkp = ss.findKeypoints(itsKeypoints); 00181 LDEBUG("%s: Found %d keypoints in ScaleSpace %d", 00182 itsName.c_str(), nkp, iter); 00183 numkp += nkp; 00184 00185 // get ready for next ScaleSpace: 00186 lum = decXY(ss.getTwoSigmaImage(ScaleSpace::LUM_CHANNEL)); 00187 00188 if (itsUseColor){ 00189 rg = decXY(ss.getTwoSigmaImage(ScaleSpace::RG_CHANNEL)); 00190 by = decXY(ss.getTwoSigmaImage(ScaleSpace::BY_CHANNEL)); 00191 } 00192 00193 ++ iter; octscale *= 2.0F; 00194 } 00195 00196 LDEBUG("%s: Found total of %d keypoints over all ScaleSpaces.", 00197 itsName.c_str(), numkp); 00198 } 00199 } 00200 00201 // ###################################################################### 00202 VisualObject::VisualObject(const VisualObject& vo) 00203 { 00204 itsName = vo.itsName; itsImageFname = vo.itsImageFname; 00205 if (vo.itsImage.initialized()) itsImage = vo.itsImage; else itsImage.freeMem(); 00206 itsKeypoints = vo.itsKeypoints; 00207 itsFeatures = vo.itsFeatures; 00208 itsIsSorted = vo.itsIsSorted; 00209 } 00210 00211 // ###################################################################### 00212 VisualObject::~VisualObject() 00213 { } 00214 00215 // ###################################################################### 00216 void VisualObject::deleteImageFile() const 00217 { 00218 if (Raster::fileExists(itsImageFname, RASFMT_PNG)) 00219 if (unlink(itsImageFname.c_str()) == -1) 00220 PLERROR("Could not delete '%s' -- IGNORING", itsImageFname.c_str()); 00221 } 00222 00223 // ###################################################################### 00224 VisualObject& VisualObject::operator=(const VisualObject& vo) 00225 { 00226 itsName = vo.itsName; itsImageFname = vo.itsImageFname; 00227 00228 itsImage.freeMem(); 00229 if (vo.itsImage.initialized()) itsImage = vo.itsImage; 00230 00231 itsKeypoints = vo.itsKeypoints; 00232 itsFeatures = vo.itsFeatures; 00233 itsIsSorted = vo.itsIsSorted; 00234 00235 return *this; 00236 } 00237 00238 // ###################################################################### 00239 double VisualObject::getFeatureDistSq(const rutz::shared_ptr<VisualObject>& obj) const 00240 { 00241 ASSERT(itsFeatures.size() == obj->itsFeatures.size()); 00242 00243 double distSq = 0.0; 00244 std::vector<float>::const_iterator 00245 src1 = itsFeatures.begin(), stop = itsFeatures.end(), 00246 src2 = obj->itsFeatures.begin(); 00247 00248 while (src1 != stop) 00249 { 00250 const double diff = double(*src1++) - double(*src2++); 00251 distSq += diff * diff; 00252 } 00253 00254 return distSq; 00255 } 00256 00257 // ###################################################################### 00258 void VisualObject::sortKeypoints() 00259 { 00260 if (itsIsSorted) return; // we are already sorted 00261 00262 // do the sorting: 00263 std::sort(itsKeypoints.begin(), itsKeypoints.end(), lessKP()); 00264 itsIsSorted = true; 00265 } 00266 00267 // ###################################################################### 00268 std::ostream& operator<<(std::ostream& os, const VisualObject& v) 00269 { 00270 os<<v.itsName<<std::endl<<v.itsImageFname<<std::endl; 00271 if (v.itsImageFname != "NULL" && v.itsImageFname != "" && Raster::fileExists(v.itsImageFname, RASFMT_PNG) == false) 00272 { 00273 LINFO("Writing image file: %s", v.itsImageFname.c_str()); 00274 Raster::WriteRGB(v.itsImage, v.itsImageFname, RASFMT_PNG); 00275 } 00276 00277 if (v.itsImageFname == "NULL" || v.itsImageFname == "") 00278 os<<v.itsObjectSize.w()<<std::endl<<v.itsObjectSize.h()<<std::endl; 00279 00280 os<<v.itsSalPoint.i<<std::endl<<v.itsSalPoint.j<<std::endl; 00281 const uint featureSize = v.itsFeatures.size(); 00282 os<<featureSize<<std::endl; 00283 for (uint i = 0; i < featureSize; i++) os<<v.itsFeatures[i]<<' '; 00284 00285 const uint keySize = v.itsKeypoints.size(); 00286 os<<keySize<<std::endl; 00287 for (uint i = 0; i < keySize; i++) os<<*(v.itsKeypoints[i]); 00288 00289 return os; 00290 } 00291 00292 // ###################################################################### 00293 std::istream& operator>>(std::istream& is, VisualObject& v) 00294 { 00295 00296 v.createVisualObject(is, v); 00297 return is; 00298 } 00299 00300 // ###################################################################### 00301 void VisualObject::createVisualObject 00302 (std::istream& is, VisualObject &v, bool loadImage) 00303 { 00304 is >> std::ws; 00305 std::getline(is, v.itsName); 00306 std::getline(is, v.itsImageFname); 00307 v.itsImageLoaded = loadImage; 00308 00309 // if the passed in filename is "" -> the entry is the blank 00310 // then we will go to a different entry, 00311 // the i val of salient point (an integer) 00312 // if the passed in filename is a string "NULL" we also skip 00313 uint featureSize; 00314 if (v.itsImageFname != "NULL" && v.itsImageFname != "") 00315 { 00316 // only load image when the user asked for 00317 if (loadImage) 00318 { 00319 LINFO("Opening image file %s", v.itsImageFname.c_str()); 00320 v.itsImage = Raster::ReadRGB(v.itsImageFname); 00321 } 00322 } 00323 else 00324 { 00325 LDEBUG("Image file %s not opened", v.itsImageFname.c_str()); 00326 v.itsImageFname = std::string("NULL"); 00327 int objW = 0, objH = 0; 00328 is>>objW; is>>objH; 00329 LINFO("%d %d", objW, objH); 00330 v.itsObjectSize = Dims(objW, objH); 00331 } 00332 is>>v.itsSalPoint.i; 00333 is>>v.itsSalPoint.j; 00334 00335 is>>featureSize; 00336 v.itsFeatures.clear(); v.itsFeatures.resize(featureSize); 00337 for (uint i = 0; i < featureSize; i++) is>>v.itsFeatures[i]; 00338 00339 uint keySize; is>>keySize; 00340 v.itsKeypoints.clear(); v.itsKeypoints.resize(keySize); 00341 00342 std::vector< rutz::shared_ptr<Keypoint> >::iterator 00343 k = v.itsKeypoints.begin(), stop = v.itsKeypoints.end(); 00344 00345 while (k != stop) 00346 { 00347 rutz::shared_ptr<Keypoint> newkey(new Keypoint()); 00348 is>>(*newkey); *k++ = newkey; 00349 } 00350 00351 v.itsIsSorted = 00352 myIsSorted(v.itsKeypoints.begin(), v.itsKeypoints.end(), lessKP()); 00353 } 00354 00355 // ###################################################################### 00356 Image<PixRGB<byte> > VisualObject:: 00357 getKeypointImage(const float scale, const float vmag, 00358 const PixRGB<byte> col) 00359 { 00360 std::vector<rutz::shared_ptr<Keypoint> >::const_iterator 00361 k = itsKeypoints.begin(), 00362 stop = itsKeypoints.end(); 00363 00364 Image< PixRGB<byte> > image(getImage()); 00365 if (scale != 1.0F) 00366 image = rescale(image, int(image.getWidth() * scale), 00367 int(image.getHeight() * scale)); 00368 00369 while(k != stop) 00370 { 00371 const float x = (*k)->getX() * scale; 00372 const float y = (*k)->getY() * scale; 00373 const float s = (*k)->getS() * scale * vmag; 00374 const float o = (*k)->getO(); 00375 00376 Point2D<int> loc(int(x + 0.5F), int(y + 0.5F)); 00377 drawDisk(image, loc, 2, PixRGB<byte>(255,0,0)); 00378 if (s > 0.0f) drawLine(image, loc, 00379 Point2D<int>(int(x + s * cosf(o) + 0.5F), 00380 int(y + s * sinf(o) + 0.5F)), 00381 PixRGB<byte>(255, 0, 0)); 00382 ++k; 00383 } 00384 return image; 00385 } 00386 00387 // ###################################################################### 00388 Image<PixRGB<byte> > VisualObject:: 00389 getKeypointImage2(const float scale, const float vmag, 00390 const PixRGB<byte> col) 00391 { 00392 std::vector<rutz::shared_ptr<Keypoint> >::const_iterator 00393 k = itsKeypoints.begin(), 00394 stop = itsKeypoints.end(); 00395 00396 Image< PixRGB<byte> > image(getImage()); 00397 if (scale != 1.0F) 00398 image = rescale(image, int(image.getWidth() * scale), 00399 int(image.getHeight() * scale)); 00400 00401 while(k != stop) 00402 { 00403 const float x = (*k)->getX() * scale; 00404 const float y = (*k)->getY() * scale; 00405 const float s = (*k)->getS() * scale * vmag; 00406 const float o = (*k)->getO(); 00407 00408 Point2D<int> loc(int(x + 0.5F), int(y + 0.5F)); 00409 drawDisk(image, loc, 2, PixRGB<byte>(255,0,0)); 00410 00411 if (s >= 1.0f) 00412 drawCircle(image, loc, int(s), PixRGB<byte>(255,0,0)); 00413 if (s > 0.0f) drawLine(image, loc, 00414 Point2D<int>(int(x + s * cosf(o) + 0.5F), 00415 int(y + s * sinf(o) + 0.5F)), 00416 PixRGB<byte>(255, 0, 0)); 00417 ++k; 00418 } 00419 return image; 00420 } 00421 00422 // ###################################################################### 00423 Image<PixRGB<byte> > VisualObject:: 00424 getSalAndKeypointImage(const float scale, const float vmag, 00425 const PixRGB<byte> col) 00426 { 00427 00428 Image<PixRGB<byte> > image = getKeypointImage(scale,vmag,col); 00429 Point2D<int> salpt((int)(itsSalPoint.i*scale), (int)(itsSalPoint.j*scale)); 00430 drawDisk(image, salpt, 2, PixRGB<byte>(255,255,0)); 00431 00432 return image; 00433 } 00434 00435 // ###################################################################### 00436 /* So things look consistent in everyone's emacs... */ 00437 /* Local Variables: */ 00438 /* indent-tabs-mode: nil */ 00439 /* End: */