VisualObject.C

Go to the documentation of this file.
00001 /*!@file SIFT/VisualObject.C Visual Objects to be recognized */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the //
00005 // University of Southern California (USC) and the iLab at USC.         //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: Laurent Itti <itti@usc.edu>
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/SIFT/VisualObject.C $
00035 // $Id: VisualObject.C 14182 2010-10-29 01:44:29Z rand $
00036 //
00037 
00038 #include "SIFT/VisualObject.H"
00039 #include "SIFT/ScaleSpace.H"
00040 #include "Image/ColorOps.H"
00041 #include "Image/DrawOps.H"
00042 #include "Image/ShapeOps.H"
00043 #include "Image/Kernels.H"
00044 #include "Image/FilterOps.H"
00045 #include "Image/MathOps.H"
00046 #include "Image/Pixels.H"
00047 #include "Raster/Raster.H"
00048 
00049 #include <algorithm>
00050 #include <cmath>
00051 #include <istream>
00052 #include <ostream>
00053 
00054 #include <cctype>
00055 
00056 namespace
00057 {
00058   bool isInteger(const std::string& s)
00059   {
00060     if (s.length() == 0) return false;
00061 
00062     if (s[0] != '-' && !isdigit(s[0])) return false;
00063 
00064     for (size_t i = 1; i < s.length(); ++i)
00065       if (!isdigit(s[i])) return false;
00066 
00067     return true;
00068   }
00069 }
00070 // ######################################################################
00071 // functor to assist with keypoint sorting:
00072 class lessKP
00073 {
00074 public:
00075   bool operator()(const rutz::shared_ptr<Keypoint>& x,
00076                   const rutz::shared_ptr<Keypoint>& y)
00077   { return (*x) < (*y); }
00078 };
00079 
00080 // here is an implementation of is_sorted() (which turns out to be a
00081 // non-standard SGI extension to the STL and hence is not always
00082 // available), ripped from
00083 // http://lists.boost.org/MailArchives/boost/msg40406.php
00084 template <class ForwardIterator, class StrictWeakOrdering>
00085 bool myIsSorted(ForwardIterator begin, ForwardIterator end,
00086                 StrictWeakOrdering comp)
00087 {
00088   if (begin == end) return true;
00089 
00090   ForwardIterator next = begin;
00091   ++next;
00092   for (; next != end ; ++begin,++next) if (comp(*next, *begin)) return false;
00093 
00094   return true;
00095 }
00096 
00097 // ######################################################################
00098 VisualObject::VisualObject(const std::string& name,
00099                            const std::string& imagefname,
00100                            const Image< PixRGB<byte> >& image,
00101                            const Point2D<int>& salpt,
00102                            const std::vector<float>& preattfeatures,
00103                            const std::vector< rutz::shared_ptr<Keypoint> >&
00104                            keypoints,
00105                            const bool useColor,
00106                            bool computeKP) :
00107 
00108   itsName(name), itsImageFname(imagefname), itsImage(image),
00109   itsKeypoints(keypoints), itsSalPoint(salpt), itsFeatures(preattfeatures),
00110   itsIsSorted(false), itsUseColor(useColor),itsImageLoaded(true)
00111 {
00112   itsObjectSize = image.getDims();
00113   if(computeKP) computeKeypoints();
00114 }
00115 
00116 // ######################################################################
00117 void VisualObject::computeKeypoints()
00118 {
00119   // if we were given an image but no keypoints, let's extract them now:
00120   if (itsImage.initialized() && itsKeypoints.empty())
00121     {
00122       LDEBUG("%s: initializing ScaleSpace from %dx%d image...",
00123              itsName.c_str(), itsImage.getWidth(), itsImage.getHeight());
00124 
00125       // compute the luminance of the image:
00126       Image<float> lum = luminance(itsImage);
00127 
00128       // compute the opponent color space
00129       // and double the image
00130       Image<float> rg, by;
00131       if (itsUseColor){
00132         getRGBY(itsImage, rg, by, 25.0F);
00133         rg = interpolate(rg);
00134         by = interpolate(by);
00135       }
00136 
00137       // double the resolution:
00138       lum = interpolate(lum);
00139 
00140       const int nums = 3;        // recommended by David Lowe
00141       const double sigma = 1.6F; // recommended by David Lowe
00142       float octscale = 0.5F;     // since we doubled the image
00143 
00144       // To feed the first ScaleSpace in our series, apply some
00145       // initial blur so that the input image has an effective blur of
00146       // the desired sigma. We assume that the original image has a
00147       // blur of at least 0.5 by construction. Since its size has been
00148       // doubled (octscale=0.5), then that becomes 1.0. We assume that
00149       // the sigma=1.6 applies to the doubled image. Remember that the
00150       // variances add when we sequentially convolve by
00151       // Gaussians. Hence the additional blur we need is such that
00152       // sigma^2 = 1^2 + blursig^2:
00153       const float blursig = sqrtf(sigma * sigma - 1.0F);
00154       Image<float> kernel = gaussian<float>(1.0F, blursig,
00155                                             lum.getWidth(), 1.0F);
00156       kernel = kernel / float(sum(kernel));
00157       lum = sepFilter(lum, kernel, kernel, CONV_BOUNDARY_CLEAN);
00158 
00159       if (itsUseColor){
00160         // scale the color space
00161         rg = sepFilter(rg, kernel, kernel, CONV_BOUNDARY_CLEAN);
00162         by = sepFilter(by, kernel, kernel, CONV_BOUNDARY_CLEAN);
00163       }
00164 
00165       // let's do it:
00166       int iter = 0; uint numkp = 0;
00167       while (lum.getWidth() > 24 && lum.getHeight() > 24)
00168         {
00169           ImageSet<float> inImg(3);
00170           inImg[ScaleSpace::LUM_CHANNEL] = lum;
00171 
00172           if (itsUseColor){        // add the color spaces to the input image
00173             inImg[ScaleSpace::RG_CHANNEL] = rg;
00174             inImg[ScaleSpace::BY_CHANNEL] = by;
00175           }
00176 
00177           ScaleSpace ss(inImg, octscale, nums, sigma, itsUseColor);
00178 
00179           // get a bunch of keypoints out of the ScaleSpace:
00180           uint nkp = ss.findKeypoints(itsKeypoints);
00181           LDEBUG("%s: Found %d keypoints in ScaleSpace %d",
00182                  itsName.c_str(), nkp, iter);
00183           numkp += nkp;
00184 
00185           // get ready for next ScaleSpace:
00186           lum = decXY(ss.getTwoSigmaImage(ScaleSpace::LUM_CHANNEL));
00187 
00188           if (itsUseColor){
00189             rg = decXY(ss.getTwoSigmaImage(ScaleSpace::RG_CHANNEL));
00190             by = decXY(ss.getTwoSigmaImage(ScaleSpace::BY_CHANNEL));
00191           }
00192 
00193           ++ iter; octscale *= 2.0F;
00194         }
00195 
00196       LDEBUG("%s: Found total of %d keypoints over all ScaleSpaces.",
00197              itsName.c_str(), numkp);
00198     }
00199 }
00200 
00201 // ######################################################################
00202 VisualObject::VisualObject(const VisualObject& vo)
00203 {
00204   itsName = vo.itsName; itsImageFname = vo.itsImageFname;
00205   if (vo.itsImage.initialized()) itsImage = vo.itsImage; else itsImage.freeMem();
00206   itsKeypoints = vo.itsKeypoints;
00207   itsFeatures = vo.itsFeatures;
00208   itsIsSorted = vo.itsIsSorted;
00209 }
00210 
00211 // ######################################################################
00212 VisualObject::~VisualObject()
00213 {  }
00214 
00215 // ######################################################################
00216 void VisualObject::deleteImageFile() const
00217 {
00218   if (Raster::fileExists(itsImageFname, RASFMT_PNG))
00219     if (unlink(itsImageFname.c_str()) == -1)
00220       PLERROR("Could not delete '%s' -- IGNORING", itsImageFname.c_str());
00221 }
00222 
00223 // ######################################################################
00224 VisualObject& VisualObject::operator=(const VisualObject& vo)
00225 {
00226   itsName = vo.itsName; itsImageFname = vo.itsImageFname;
00227 
00228   itsImage.freeMem();
00229   if (vo.itsImage.initialized()) itsImage = vo.itsImage;
00230 
00231   itsKeypoints = vo.itsKeypoints;
00232   itsFeatures = vo.itsFeatures;
00233   itsIsSorted = vo.itsIsSorted;
00234 
00235   return *this;
00236 }
00237 
00238 // ######################################################################
00239 double VisualObject::getFeatureDistSq(const rutz::shared_ptr<VisualObject>& obj) const
00240 {
00241   ASSERT(itsFeatures.size() == obj->itsFeatures.size());
00242 
00243   double distSq = 0.0;
00244   std::vector<float>::const_iterator
00245     src1 = itsFeatures.begin(), stop = itsFeatures.end(),
00246     src2 = obj->itsFeatures.begin();
00247 
00248   while (src1 != stop)
00249     {
00250       const double diff = double(*src1++) - double(*src2++);
00251       distSq += diff * diff;
00252     }
00253 
00254   return distSq;
00255 }
00256 
00257 // ######################################################################
00258 void VisualObject::sortKeypoints()
00259 {
00260   if (itsIsSorted) return; // we are already sorted
00261 
00262   // do the sorting:
00263   std::sort(itsKeypoints.begin(), itsKeypoints.end(), lessKP());
00264   itsIsSorted = true;
00265 }
00266 
00267 // ######################################################################
00268 std::ostream& operator<<(std::ostream& os, const VisualObject& v)
00269 {
00270   os<<v.itsName<<std::endl<<v.itsImageFname<<std::endl;
00271   if (v.itsImageFname != "NULL" && v.itsImageFname != "" && Raster::fileExists(v.itsImageFname, RASFMT_PNG) == false)
00272     {
00273       LINFO("Writing image file: %s", v.itsImageFname.c_str());
00274       Raster::WriteRGB(v.itsImage, v.itsImageFname, RASFMT_PNG);
00275     }
00276 
00277   if (v.itsImageFname == "NULL" || v.itsImageFname == "")
00278     os<<v.itsObjectSize.w()<<std::endl<<v.itsObjectSize.h()<<std::endl;
00279 
00280   os<<v.itsSalPoint.i<<std::endl<<v.itsSalPoint.j<<std::endl;
00281   const uint featureSize = v.itsFeatures.size();
00282   os<<featureSize<<std::endl;
00283   for (uint i = 0; i < featureSize; i++) os<<v.itsFeatures[i]<<' ';
00284 
00285   const uint keySize = v.itsKeypoints.size();
00286   os<<keySize<<std::endl;
00287   for (uint i = 0; i < keySize; i++) os<<*(v.itsKeypoints[i]);
00288 
00289   return os;
00290 }
00291 
00292 // ######################################################################
00293 std::istream& operator>>(std::istream& is, VisualObject& v)
00294 {
00295 
00296   v.createVisualObject(is, v);
00297   return is;
00298 }
00299 
00300 // ######################################################################
00301 void VisualObject::createVisualObject
00302 (std::istream& is, VisualObject &v, bool loadImage)
00303 {
00304   is >> std::ws;
00305   std::getline(is, v.itsName);
00306   std::getline(is, v.itsImageFname);
00307   v.itsImageLoaded = loadImage;
00308 
00309   // if the passed in filename is "" -> the entry is the blank
00310   // then we will go to a different entry,
00311   // the i val of salient point (an integer)
00312   // if the passed in filename is a string "NULL" we also skip
00313   uint featureSize;
00314   if (v.itsImageFname != "NULL" && v.itsImageFname != "")
00315     {
00316       // only load image when the user asked for
00317       if (loadImage)
00318         {
00319           LINFO("Opening image file %s", v.itsImageFname.c_str());
00320           v.itsImage = Raster::ReadRGB(v.itsImageFname);
00321         }
00322     }
00323   else
00324     {
00325       LDEBUG("Image file %s not opened", v.itsImageFname.c_str());
00326       v.itsImageFname = std::string("NULL");
00327       int objW = 0, objH = 0;
00328       is>>objW; is>>objH;
00329       LINFO("%d %d", objW, objH);
00330       v.itsObjectSize = Dims(objW, objH);
00331     }
00332   is>>v.itsSalPoint.i;
00333   is>>v.itsSalPoint.j;
00334 
00335   is>>featureSize;
00336   v.itsFeatures.clear(); v.itsFeatures.resize(featureSize);
00337   for (uint i = 0; i < featureSize; i++) is>>v.itsFeatures[i];
00338 
00339   uint keySize; is>>keySize;
00340   v.itsKeypoints.clear(); v.itsKeypoints.resize(keySize);
00341 
00342   std::vector< rutz::shared_ptr<Keypoint> >::iterator
00343     k = v.itsKeypoints.begin(), stop = v.itsKeypoints.end();
00344 
00345   while (k != stop)
00346     {
00347       rutz::shared_ptr<Keypoint> newkey(new Keypoint());
00348       is>>(*newkey); *k++ = newkey;
00349     }
00350 
00351   v.itsIsSorted =
00352     myIsSorted(v.itsKeypoints.begin(), v.itsKeypoints.end(), lessKP());
00353 }
00354 
00355 // ######################################################################
00356 Image<PixRGB<byte> > VisualObject::
00357 getKeypointImage(const float scale, const float vmag,
00358                  const PixRGB<byte> col)
00359 {
00360   std::vector<rutz::shared_ptr<Keypoint> >::const_iterator
00361     k = itsKeypoints.begin(),
00362     stop = itsKeypoints.end();
00363 
00364   Image< PixRGB<byte> > image(getImage());
00365   if (scale != 1.0F)
00366     image = rescale(image, int(image.getWidth() * scale),
00367                     int(image.getHeight() * scale));
00368 
00369   while(k != stop)
00370     {
00371       const float x = (*k)->getX() * scale;
00372       const float y = (*k)->getY() * scale;
00373       const float s = (*k)->getS() * scale * vmag;
00374       const float o = (*k)->getO();
00375 
00376       Point2D<int> loc(int(x + 0.5F), int(y + 0.5F));
00377       drawDisk(image, loc, 2, PixRGB<byte>(255,0,0));
00378       if (s > 0.0f) drawLine(image, loc,
00379                              Point2D<int>(int(x + s * cosf(o) + 0.5F),
00380                                      int(y + s * sinf(o) + 0.5F)),
00381                              PixRGB<byte>(255, 0, 0));
00382       ++k;
00383     }
00384   return image;
00385 }
00386 
00387 // ######################################################################
00388 Image<PixRGB<byte> > VisualObject::
00389 getKeypointImage2(const float scale, const float vmag,
00390                   const PixRGB<byte> col)
00391 {
00392   std::vector<rutz::shared_ptr<Keypoint> >::const_iterator
00393     k = itsKeypoints.begin(),
00394     stop = itsKeypoints.end();
00395 
00396   Image< PixRGB<byte> > image(getImage());
00397   if (scale != 1.0F)
00398     image = rescale(image, int(image.getWidth() * scale),
00399                     int(image.getHeight() * scale));
00400 
00401   while(k != stop)
00402     {
00403       const float x = (*k)->getX() * scale;
00404       const float y = (*k)->getY() * scale;
00405       const float s = (*k)->getS() * scale * vmag;
00406       const float o = (*k)->getO();
00407 
00408       Point2D<int> loc(int(x + 0.5F), int(y + 0.5F));
00409       drawDisk(image, loc, 2, PixRGB<byte>(255,0,0));
00410 
00411       if (s >= 1.0f)
00412         drawCircle(image, loc, int(s), PixRGB<byte>(255,0,0));
00413       if (s > 0.0f) drawLine(image, loc,
00414                              Point2D<int>(int(x + s * cosf(o) + 0.5F),
00415                                      int(y + s * sinf(o) + 0.5F)),
00416                              PixRGB<byte>(255, 0, 0));
00417       ++k;
00418     }
00419   return image;
00420 }
00421 
00422 // ######################################################################
00423 Image<PixRGB<byte> > VisualObject::
00424 getSalAndKeypointImage(const float scale, const float vmag,
00425                        const PixRGB<byte> col)
00426 {
00427 
00428   Image<PixRGB<byte> > image = getKeypointImage(scale,vmag,col);
00429   Point2D<int> salpt((int)(itsSalPoint.i*scale), (int)(itsSalPoint.j*scale));
00430   drawDisk(image, salpt, 2, PixRGB<byte>(255,255,0));
00431 
00432   return image;
00433 }
00434 
00435 // ######################################################################
00436 /* So things look consistent in everyone's emacs... */
00437 /* Local Variables: */
00438 /* indent-tabs-mode: nil */
00439 /* End: */