ObjRecSPM.C

Go to the documentation of this file.
00001 /*!@file ObjRec/ObjRecSPM.C Obj Reconition using spatial pyramid matching
00002  algorithem from Lzebnik, Schmid and Ponce
00003  */
00004 
00005 // //////////////////////////////////////////////////////////////////// //
00006 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the //
00007 // University of Southern California (USC) and the iLab at USC.         //
00008 // See http://iLab.usc.edu for information about this project.          //
00009 // //////////////////////////////////////////////////////////////////// //
00010 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00011 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00012 // in Visual Environments, and Applications'' by Christof Koch and      //
00013 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00014 // pending; application number 09/912,225 filed July 23, 2001; see      //
00015 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00016 // //////////////////////////////////////////////////////////////////// //
00017 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00018 //                                                                      //
00019 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00020 // redistribute it and/or modify it under the terms of the GNU General  //
00021 // Public License as published by the Free Software Foundation; either  //
00022 // version 2 of the License, or (at your option) any later version.     //
00023 //                                                                      //
00024 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00025 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00026 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00027 // PURPOSE.  See the GNU General Public License for more details.       //
00028 //                                                                      //
00029 // You should have received a copy of the GNU General Public License    //
00030 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00031 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00032 // Boston, MA 02111-1307 USA.                                           //
00033 // //////////////////////////////////////////////////////////////////// //
00034 //
00035 // Primary maintainer for this file: Lior Elazary <elazary@usc.edu>
00036 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/ObjRec/ObjRecSPM.C $
00037 // $Id: ObjRecSPM.C 10794 2009-02-08 06:21:09Z itti $
00038 //
00039 
00040 #include "Component/OptionManager.H"
00041 #include "Image/ColorOps.H"
00042 #include "Image/FilterOps.H"
00043 #include "Image/MathOps.H"
00044 #include "Image/Kernels.H"
00045 #include "Image/Convolutions.H"
00046 #include "GUI/DebugWin.H"
00047 #include "ObjRec/ObjRecSPM.H"
00048 #include "SIFT/FeatureVector.H"
00049 
00050 
00051 // ######################################################################
00052 ObjRecSPM::ObjRecSPM(OptionManager& mgr, const std::string& descrName,
00053     const std::string& tagName) :
00054   ModelComponent(mgr, descrName, tagName),
00055   itsNumOri(4),
00056   itsNumScales(2),
00057   itsNumOriArray(36), //for sift descriptor
00058   itsObjects(0),
00059   itsUseSaliency(false)
00060 {
00061 
00062   float stdmin = 1.75F;
00063   float stdstep = 0.5F;
00064   int fsmin = 3;
00065   int fsstep = 1;
00066 
00067   //create the filters
00068   for(int scale = 0; scale < itsNumScales; scale++)
00069     for(int ori = 0; ori < itsNumOri; ori++)
00070     {
00071 
00072       Image<float> filter = dogFilter<float>(stdmin + stdstep * scale,
00073           (float)ori * 180.0F / (float)itsNumOri,
00074           fsmin + fsstep * scale);
00075 
00076       //  fsmin + fsstep * scale);
00077       // normalize to zero mean:
00078       filter -= mean(filter);
00079 
00080       // normalize to unit sum-of-squares:
00081       filter /= sum(squared(filter));
00082 
00083       itsFilters.push_back(filter);
00084     }
00085 
00086     if (itsUseSaliency)
00087     {
00088       itsGetSaliency = nub::soft_ref<GetSaliency>(new GetSaliency(mgr));
00089       addSubComponent(itsGetSaliency);
00090     }
00091 
00092 }
00093 
00094 void ObjRecSPM::start2()
00095 {
00096 
00097 
00098 }
00099 
00100 ObjRecSPM::~ObjRecSPM()
00101 {
00102 }
00103 
00104 
00105 void ObjRecSPM::train(const Image<PixRGB<byte> > &img, const std::string label)
00106 {
00107 
00108   Image<float> input = luminance(img); //convert to gray
00109 
00110   if (itsUseSaliency)
00111   {
00112     Image<PixRGB<byte> > inputImg = rescale(img, 256, 256);
00113     itsGetSaliency->compute(inputImg, SimTime::MSECS(3.0));
00114     Image<float> smap = itsGetSaliency->getSalmap();
00115     smap = rescale(smap, img.getDims());
00116     inplaceNormalize(smap, 0.0F, 1.0F);
00117     input *= smap; //weigh the input by the saliency map
00118   }
00119 
00120   //Descriptor desc = extractFeatures(input);
00121   Descriptor desc = extractSiftFeatures(input);
00122 
00123   uint objId = getObject(label);
00124 
00125   itsObjects[objId].model.push_back(desc);
00126 
00127 }
00128 
00129 void ObjRecSPM::finalizeTraining()
00130 {
00131   LINFO("Training done");
00132 
00133   for(uint i=0; i<itsObjects.size(); i++)
00134     for(uint j=0; j<itsObjects[i].model.size(); j++)
00135     {
00136       printf("Obj %u model %u size %"ZU,
00137           i, j, itsObjects[i].model[j].featureLevelHist.size());
00138 
00139     }
00140 
00141 
00142 }
00143 
00144 
00145 uint ObjRecSPM::getObject(const std::string name)
00146 {
00147   //Find the object
00148   //TODO can use hash function
00149   uint i=0;
00150   for(i=0; i<itsObjects.size(); i++)
00151     if (itsObjects[i].name == name)
00152       return i;
00153 
00154   //Object not found. create a new one
00155   Object obj;
00156   obj.id = i;
00157   obj.name = name;
00158   obj.model.clear();
00159 
00160   itsObjects.push_back(obj);
00161 
00162   return i;
00163 }
00164 
00165 
00166 ObjRecSPM::Descriptor ObjRecSPM::extractFeatures(const Image<float> &input)
00167 {
00168 
00169   double normSum = 0; //the numalization constant
00170 
00171   //Compute the various low level features
00172   ImageSet<float> featuresValues(itsFilters.size());
00173   for(uint i=0; i<itsFilters.size(); i++)
00174   {
00175     Image<float> tmp = convolve(input, itsFilters[i], CONV_BOUNDARY_CLEAN); //No normalization
00176     //Image<float> tmp = convolveHmax(input, itsFilters[i]); // normalize by image energy
00177     tmp = abs(tmp);
00178     normSum += sum(tmp);
00179     featuresValues[i] = tmp;
00180   }
00181 
00182   //get the histograms
00183 
00184   Descriptor desc;
00185   for(uint feature=0; feature<featuresValues.size(); feature++) //for each feature m
00186   {
00187     Image<float> featureVal = featuresValues[feature];
00188 
00189     std::vector<Histogram> levelHists;
00190     for(int level = 0; level < 4; level++)
00191     {
00192       int levelSize = 1<<level;
00193       Histogram hist(levelSize*levelSize);
00194 
00195       int xSpace = (featureVal.getWidth()/levelSize)+1;
00196       int ySpace = (featureVal.getHeight()/levelSize)+1;
00197 
00198       for(int y=0; y<featureVal.getHeight(); y++)
00199         for(int x=0; x<featureVal.getWidth(); x++)
00200         {
00201           int binPos = (int)(x/xSpace + 2*(y/ySpace));
00202           hist.addValue(binPos, featureVal.getVal(x,y));
00203         }
00204       hist.normalize(normSum); //normalize across the sum of all feature outputs
00205       levelHists.push_back(hist);
00206     }
00207     desc.featureLevelHist.push_back(levelHists);
00208   }
00209 
00210   return desc;
00211 }
00212 
00213 ObjRecSPM::Descriptor ObjRecSPM::extractSiftFeatures(const Image<float> &input)
00214 {
00215 
00216   SHOWIMG(input);
00217   Descriptor desc;
00218   for(int y=10; y<input.getHeight()-10; y+=10)
00219     for(int x=10; x<input.getWidth()-10; x+=10)
00220     {
00221       SiftKeypoint kp;
00222       kp.x = x;
00223       kp.y = y;
00224       kp.fv = getSiftDescriptor(input, x,y,2);
00225       desc.siftDescriptors.push_back(kp);
00226       LINFO("%ix%i", x, y);
00227       //for(uint i=0; i<fv.size(); i++)
00228       //{
00229       //  FeatureVector fvtmp;
00230       //
00231       //  SHOWIMG(fvtmp.getFeatureVectorImage(fv[i]));
00232       //}
00233     }
00234 
00235 
00236   return desc;
00237 }
00238 
00239 std::string ObjRecSPM::predict(const Image<PixRGB<byte> > &img)
00240 {
00241 
00242   Image<float> input = luminance(img); //convert to gray
00243 
00244   if (itsUseSaliency)
00245   {
00246     Image<PixRGB<byte> > inputImg = rescale(img, 256, 256);
00247     itsGetSaliency->compute(inputImg, SimTime::MSECS(3.0));
00248     Image<float> smap = itsGetSaliency->getSalmap();
00249     smap = rescale(smap, img.getDims());
00250     inplaceNormalize(smap, 0.0F, 1.0F);
00251     input *= smap; //weigh the input by the saliency map
00252   }
00253 
00254   //Descriptor desc = extractFeatures(input);
00255   Descriptor desc = extractSiftFeatures(input);
00256 
00257   //find matches
00258 
00259  // Descriptor objDec = itsObjects[0].model[0];
00260 
00261   for(uint kp_i=0; kp_i<desc.siftDescriptors.size(); kp_i++)
00262   {
00263     SiftKeypoint kp = desc.siftDescriptors[kp_i];
00264 
00265     for(uint fv_i=0; fv_i<kp.fv.size(); fv_i++)
00266     {
00267       std::vector<byte> fv = kp.fv[fv_i];
00268       FeatureVector tmpFv;
00269 
00270       SHOWIMG(tmpFv.getFeatureVectorImage(fv));
00271     }
00272   }
00273 
00274 
00275 
00276 
00277 
00278 
00279   int objId = findObject(desc);
00280 
00281   if (objId != -1)
00282     return itsObjects[objId].name;
00283 
00284   return std::string("Unknown");
00285 }
00286 
00287 //NN search
00288 int ObjRecSPM::findObject(const Descriptor &desc)
00289 {
00290   int objId = -1;
00291 
00292   double minDist = std::numeric_limits<double>::max();
00293   //Find the object with the closest distance
00294   for(uint i=0; i<itsObjects.size(); i++)
00295   {
00296     //Find the closes model match with the data
00297     for(uint j=0; j<itsObjects[i].model.size(); j++)
00298     {
00299       double dist = matchDescriptor(itsObjects[i].model[j], desc);
00300       if (dist < minDist)
00301       {
00302         minDist = dist;
00303         objId = i;
00304       }
00305     }
00306 
00307   }
00308 
00309   return objId;
00310 
00311 }
00312 
00313 double ObjRecSPM::matchDescriptor(const Descriptor &descA, const Descriptor &descB)
00314 {
00315 
00316   double sum = 0;
00317   for(uint feature=0; feature<descA.featureLevelHist.size(); feature++)
00318   {
00319     sum += matchKernel(descA.featureLevelHist[feature], descB.featureLevelHist[feature]);
00320   }
00321 
00322   return sum;
00323 
00324 
00325 
00326 }
00327 
00328 double ObjRecSPM::matchKernel(const std::vector<Histogram>& A, const std::vector<Histogram>& B)
00329 {
00330 
00331 
00332   if (B.size() > A.size())
00333     LFATAL("Incompatibale histograms");
00334   double dist = 0;
00335 
00336   //Equlideian distance between histograms
00337   for(uint level=0; level<A.size(); level++)
00338   {
00339     Histogram modelHist = A[level];
00340     Histogram testHist = B[level];
00341 
00342     double weight;
00343     if (level == 0)
00344       weight = 1.0F/(double)(1<<(A.size()));
00345     else
00346       weight = 1.0F/(double)(1<<(A.size()-level+1));
00347     dist += weight*modelHist.getDistance(testHist);
00348 
00349   }
00350 
00351   //histogram intersection
00352   //for(uint i=0; i<A.size(); i++)
00353   //  dist += std::min(A[i],B[i]);
00354 
00355   //KL distance
00356   //for(uint i=0; i<A.size(); i++)
00357   //{
00358   //  //Insure that no probabiliy is set to 0
00359   //  double pA = (A[i] > 0) ? A[i] : 0.0001;
00360   //  double qB = (B[i] > 0) ? B[i] : 0.0001;
00361 
00362   //  dist += pA * log(pA/qB);
00363   //}
00364 
00365 
00366   return dist;
00367 
00368 }
00369 
00370 std::vector<std::vector<byte> >  ObjRecSPM::getSiftDescriptor(const Image<float> &lum,
00371     const float x, const float y, const float s)
00372 {
00373 
00374   Image<float> mag, ori;
00375   gradientSobel(lum, mag, ori, 3);
00376 
00377   Histogram OV(36);
00378 
00379   // 1. Calculate main orientation vector
00380   calculateOrientationVector(x, y, s, mag, ori, OV);
00381 
00382   //Image<byte> histImg = OV.getHistogramImage(256, 256, 0, -1);
00383 
00384   // 2. Create feature vector and keypoint for each significant
00385   // orientation peak:
00386   return createVectorsAndKeypoints(x, y, s, mag, ori, OV);
00387 
00388 }
00389 
00390 void ObjRecSPM::calculateOrientationVector(const float x, const float y, const float s,
00391                 const Image<float>& gradmag, const Image<float>& gradorie, Histogram& OV) {
00392 
00393 
00394         // compute the effective blurring sigma corresponding to the
00395         // fractional scale s:
00396         const float sigma = s;
00397 
00398         const float sig = 1.5F * sigma, inv2sig2 = - 0.5F / (sig * sig);
00399         const int dimX = gradmag.getWidth(), dimY = gradmag.getHeight();
00400 
00401         const int xi = int(x + 0.5f);
00402         const int yi = int(y + 0.5f);
00403 
00404         const int rad = int(3.0F * sig);
00405         const int rad2 = rad * rad;
00406 
00407 
00408         // search bounds:
00409         int starty = yi - rad; if (starty < 0) starty = 0;
00410         int stopy = yi + rad; if (stopy >= dimY) stopy = dimY-1;
00411 
00412         // 1. Calculate orientation vector
00413         for (int ind_y = starty; ind_y <= stopy; ind_y ++)
00414         {
00415                 // given that y, get the corresponding range of x values that
00416                 // lie within the disk (and remain within the image):
00417                 const int yoff = ind_y - yi;
00418                 const int bound = int(sqrtf(float(rad2 - yoff*yoff)) + 0.5F);
00419                 int startx = xi - bound; if (startx < 0) startx = 0;
00420                 int stopx = xi + bound; if (stopx >= dimX) stopx = dimX-1;
00421 
00422                 for (int ind_x = startx; ind_x <= stopx; ind_x ++)
00423                 {
00424                         const float dx = float(ind_x) - x, dy = float(ind_y) - y;
00425                         const float distSq = dx * dx + dy * dy;
00426 
00427                         // get gradient:
00428                         const float gradVal = gradmag.getVal(ind_x, ind_y);
00429 
00430                         // compute the gaussian weight for this histogram entry:
00431                         const float gaussianWeight = expf(distSq * inv2sig2);
00432 
00433                         // add this orientation to the histogram
00434                         // [-pi ; pi] -> [0 ; 2pi]
00435                         float angle = gradorie.getVal(ind_x, ind_y) + M_PI;
00436 
00437                         // [0 ; 2pi] -> [0 ; 36]
00438                         angle = 0.5F * angle * itsNumOriArray / M_PI;
00439                         while (angle < 0.0F) angle += itsNumOriArray;
00440                         while (angle >= itsNumOriArray) angle -= itsNumOriArray;
00441 
00442                         OV.addValueInterp(angle, gaussianWeight * gradVal);
00443                 }
00444         }
00445 
00446 
00447         // smooth the orientation histogram 3 times:
00448         for (int i = 0; i < 3; i++) OV.smooth();
00449 }
00450 
00451 // ######################################################################
00452 
00453 
00454 std::vector<std::vector<byte> > ObjRecSPM::createVectorsAndKeypoints(const float x,
00455     const float y, const float s,
00456     const Image<float>& gradmag, const Image<float>& gradorie, Histogram& OV)
00457 {
00458 
00459   const float sigma = s; //itsSigma * powf(2.0F, s / float(itsLumBlur.size() - 3));
00460 
00461   // find the max in the histogram:
00462   float maxPeakValue = OV.findMax();
00463 
00464   const int xi = int(x + 0.5f);
00465   const int yi = int(y + 0.5f);
00466 
00467   uint numkp = 0;
00468 
00469   std::vector<std::vector<byte> > descriptor;
00470 
00471   // 2. Create feature vector and keypoint for each significant
00472   // orientation peak:
00473   for (int bin = 0; bin < itsNumOriArray; bin++)
00474   {
00475     // consider the peak centered around 'bin':
00476     const float midval = OV.getValue(bin);
00477 
00478     // if current value much smaller than global peak, forget it:
00479     if (midval < 0.8F * maxPeakValue) continue;
00480 
00481     // get value to the left of current value
00482     const float leftval = OV.getValue((bin == 0) ? itsNumOriArray-1 : bin-1);
00483 
00484     // get value to the right of current value
00485     const float rightval = OV.getValue((bin == itsNumOriArray-1) ? 0 : bin+1);
00486 
00487     // only consider local peaks:
00488     if (leftval > midval) continue;
00489     if (rightval > midval) continue;
00490 
00491     // interpolate the values to get the orientation of the peak:
00492     //  with f(x) = ax^2 + bx + c
00493     //   f(-1) = x0 = leftval
00494     //   f( 0) = x1 = midval
00495     //   f(+1) = x2 = rightval
00496     //  => a = (x0+x2)/2 - x1
00497     //     b = (x2-x0)/2
00498     //     c = x1
00499     // f'(x) = 0 => x = -b/2a
00500     const float a  = 0.5f * (leftval + rightval) - midval;
00501     const float b  = 0.5f * (rightval - leftval);
00502     float realangle = float(bin) - 0.5F * b / a;
00503 
00504     realangle *= 2.0F * M_PI / itsNumOriArray; // [0:36] to [0:2pi]
00505     realangle -= M_PI;                      // [0:2pi] to [-pi:pi]
00506 
00507     // ############ Create keypoint:
00508 
00509     // compute the feature vector:
00510     FeatureVector fv;
00511 
00512     const float sinAngle = sin(realangle), cosAngle = cos(realangle);
00513 
00514     // check this scale
00515     const int radius = int(5.0F * sigma + 0.5F); // NOTE: Lowe uses radius=8?
00516     const float gausssig = float(radius); // 1/2 width of descript window
00517     const float gaussfac = - 0.5F / (gausssig * gausssig);
00518 
00519 
00520     // Scan a window of diameter 2*radius+1 around the point of
00521     // interest, and we will cumulate local samples into a 4x4 grid
00522     // of bins, with interpolation. NOTE: rx and ry loop over a
00523     // square that is assumed centered around the point of interest
00524     // and rotated to the gradient orientation (realangle):
00525 
00526     int scale = abs(int(s));
00527     scale = scale > 5 ? 5 : scale;
00528 
00529     for (int ry = -radius; ry <= radius; ry++)
00530       for (int rx = -radius; rx <= radius; rx++)
00531       {
00532         // rotate the point:
00533         const float newX = rx * cosAngle - ry * sinAngle;
00534         const float newY = rx * sinAngle + ry * cosAngle;
00535 
00536         // get the coords in the image frame of reference:
00537         const float orgX = newX + float(xi);
00538         const float orgY = newY + float(yi);
00539 
00540         // if outside the image, forget it:
00541         if (gradmag.coordsOk(orgX, orgY) == false) continue;
00542 
00543         // find the fractional coords of the corresponding bin
00544         // (we subdivide our window into a 4x4 grid of bins):
00545         const float xf = 2.0F + 2.0F * float(rx) / float(radius);
00546         const float yf = 2.0F + 2.0F * float(ry) / float(radius);
00547 
00548 
00549         // find the Gaussian weight from distance to center and
00550         // get weighted gradient magnitude:
00551         const float gaussFactor = expf((newX*newX+newY*newY) * gaussfac);
00552         const float weightedMagnitude =
00553           gaussFactor * gradmag.getValInterp(orgX, orgY);
00554 
00555         // get the gradient orientation relative to the keypoint
00556         // orientation and scale it for 8 orientation bins:
00557         float gradAng = gradorie.getValInterp(orgX, orgY) - realangle;
00558 
00559         gradAng=fmod(gradAng, 2*M_PI); //bring the range from 0 to M_PI
00560 
00561         //convert from -M_PI to M_PI
00562         if (gradAng < 0.0) gradAng+=2*M_PI; //convert to -M_PI to M_PI
00563         if (gradAng >= M_PI) gradAng-=2*M_PI;
00564         //split to eight bins
00565         const float orient = (gradAng + M_PI) * 8 / (2 * M_PI);
00566 
00567         /*
00568         //reflect the angle to convert from 0 to M_PI
00569         if (gradAng >= M_PI) gradAng-=M_PI;
00570         //split to four bins
00571         const float orient = (gradAng + M_PI) * 4 / (2 * M_PI);
00572          */
00573 
00574         // will be interpolated into 2 x 2 x 2 bins:
00575 
00576         fv.addValue(xf, yf, orient, weightedMagnitude);
00577 
00578       }
00579 
00580     // normalize, clamp, scale and convert to byte:
00581     std::vector<byte> oriVec;
00582 
00583     fv.toByteKey(oriVec);
00584 
00585     double mag = fv.getMag();
00586     if (oriVec.size() > 0 && mag > 0)
00587       descriptor.push_back(oriVec);
00588     //The key point
00589 
00590     ++ numkp;
00591 
00592   }
00593   return descriptor;
00594 }
00595 
00596 
00597 
00598 
00599 
00600 // ######################################################################
00601 /* So things look consistent in everyone's emacs... */
00602 /* Local Variables: */
00603 /* indent-tabs-mode: nil */
00604 /* End: */