ObjRecBOF.C

Go to the documentation of this file.
00001 /*!@file ObjRec/ObjRecBOF.C  ObjRec using bag of features */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the //
00005 // University of Southern California (USC) and the iLab at USC.         //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: Lior Elazary <elazary@usc.edu>
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/ObjRec/ObjRecBOF.C $
00035 // $Id: ObjRecBOF.C 13716 2010-07-28 22:07:03Z itti $
00036 //
00037 
00038 #include "ObjRec/ObjRecBOF.H"
00039 #include "Component/OptionManager.H"
00040 #include "Image/ColorOps.H"
00041 #include "GUI/DebugWin.H"
00042 #include "Image/DrawOps.H"
00043 #include "Image/FilterOps.H"
00044 #include "Image/ImageSet.H"
00045 #include "Image/Kernels.H"
00046 #include "Image/MathOps.H"
00047 #include <dirent.h>
00048 
00049 
00050 
00051 // ######################################################################
00052 ObjRecBOF::ObjRecBOF(OptionManager& mgr, const std::string& descrName,
00053     const std::string& tagName) :
00054   ModelComponent(mgr, descrName, tagName),
00055   itsNumOriArray(36)
00056 {
00057 
00058 }
00059 
00060 void ObjRecBOF::start2()
00061 {
00062 
00063 
00064 }
00065 
00066 ObjRecBOF::~ObjRecBOF()
00067 {
00068 }
00069 
00070 
00071 void ObjRecBOF::train(const Image<PixRGB<byte> > &img, const std::string label)
00072 {
00073 
00074 }
00075 
00076 void ObjRecBOF::train(const std::string &name, int cls)
00077 {
00078 
00079   //getSaliencyKeypoints(name.c_str());
00080   //getSIFTKeypoints(name.c_str());
00081   getIlabSIFTKeypoints(name.c_str());
00082 
00083 }
00084 
00085 void ObjRecBOF::getSaliencyKeypoints(const std::string &name)
00086 {
00087 
00088   char filename[255];
00089   const char* dirname = "/lab/tmpib/u/objRec/bof/salBayes/fv";
00090   snprintf(filename, sizeof(filename), "%s/%s.jpg.dat", dirname, name.c_str());
00091   Object obj;
00092   obj.name = name;
00093   obj.keypoints = readSaliencyKeypoints(filename); //read the keypoints
00094   if (obj.keypoints.size() > 0)
00095     itsObjects.push_back(obj);
00096 }
00097 
00098 void ObjRecBOF::getSIFTKeypoints(const std::string &name)
00099 {
00100 
00101   char filename[255];
00102   const char* dirname = "/lab/tmpib/u/objRec/bof/siftDemoV4/voc/keyp";
00103   snprintf(filename, sizeof(filename), "%s/%s.jpg.dat", dirname, name.c_str());
00104   Object obj;
00105   obj.name = name;
00106   obj.keypoints = readSIFTKeypoints(filename); //read the keypoints
00107   if (obj.keypoints.size() > 0)
00108     itsObjects.push_back(obj);
00109 }
00110 
00111 void ObjRecBOF::getIlabSIFTKeypoints(const std::string &name)
00112 {
00113 
00114   char filename[255];
00115   const char* dirname = "/home/elazary/images/VOCdevkit/VOC2007/ilabSift";
00116   snprintf(filename, sizeof(filename), "%s/%s.jpg.dat", dirname, name.c_str());
00117   Object obj;
00118   obj.name = name;
00119   obj.keypoints = readIlabSIFTKeypoints(filename); //read the keypoints
00120   if (obj.keypoints.size() > 0)
00121     itsObjects.push_back(obj);
00122 }
00123 
00124 void ObjRecBOF::finalizeTraining()
00125 {
00126   getCodeWords(200);
00127   printCodeWords();
00128 
00129 }
00130 
00131 void ObjRecBOF::finalizeTesting()
00132 {
00133   assignCodeWords();
00134   printAssignedCodeWords();
00135 }
00136 
00137 void ObjRecBOF::getObjCodeWords(const std::string &name)
00138 {
00139   char filename[255];
00140   const char* dirname = "/home/elazary/images/VOCdevkit/VOC2007/ilabSift";
00141   snprintf(filename, sizeof(filename), "%s/%s.jpg.dat", dirname, name.c_str());
00142   Object obj;
00143   obj.name = name;
00144   obj.keypoints = readIlabSIFTKeypoints(filename); //read the keypoints
00145   if (obj.keypoints.size() > 0)
00146   {
00147     for(uint kp=0; kp<obj.keypoints.size(); kp++)
00148     {
00149       int codeWord = assignCodeWord(obj.keypoints[kp].fv);
00150       obj.keypoints[kp].codeWord = codeWord;
00151     }
00152   }
00153 
00154   snprintf(filename, sizeof(filename), "%s.dat", obj.name.c_str());
00155   FILE* fp = fopen(filename, "w");
00156   if (!fp)
00157     LFATAL("Error writing %s", obj.name.c_str());
00158 
00159   for(uint kp=0; kp<obj.keypoints.size(); kp++)
00160   {
00161     fprintf(fp, "%f %f %i ",
00162         obj.keypoints[kp].x,
00163         obj.keypoints[kp].y,
00164         obj.keypoints[kp].codeWord);
00165     //for(uint i=0; i<itsObjects[obj].keypoints[kp].fv.size(); i++)
00166     //  fprintf(fp, "%f ", itsObjects[obj].keypoints[kp].fv[i]);
00167     fprintf(fp, "\n");
00168   }
00169   fclose(fp);
00170 
00171 }
00172 
00173 Image<float> ObjRecBOF::extractFeatures(const Image<PixRGB<byte> > &img)
00174 {
00175   extractSIFTFeatures(img);
00176   //extractGaborFeatures(img);
00177 
00178   return Image<float>();
00179 }
00180 
00181 void ObjRecBOF::extractGaborFeatures(const Image<PixRGB<byte> > &img)
00182 {
00183 
00184   int itsNumOri = 8;
00185   int itsNumScales = 2;
00186 
00187   float stdmin = 1.75F;
00188   float stdstep = 0.5F;
00189   int fsmin = 3;
00190   int fsstep = 1;
00191 
00192   ImageSet<float> itsFilters;
00193   //create the filters
00194   for(int scale = 0; scale < itsNumScales; scale++)
00195     for(int ori = 0; ori < itsNumOri; ori++)
00196     {
00197 
00198       Image<float> filter = dogFilter<float>(stdmin + stdstep * scale,
00199           (float)ori * 180.0F / (float)itsNumOri,
00200           fsmin + fsstep * scale);
00201 
00202       //  fsmin + fsstep * scale);
00203       // normalize to zero mean:
00204       filter -= mean(filter);
00205 
00206       // normalize to unit sum-of-squares:
00207      // filter /= sum(squared(filter));
00208 
00209       itsFilters.push_back(filter);
00210     }
00211 
00212 
00213   double normSum = 0; //the numalization constant
00214   Image<float> input = luminance(img); //convert to gray
00215   //Compute the various low level features
00216   ImageSet<float> featuresValues(itsFilters.size());
00217   for(uint i=0; i<itsFilters.size(); i++)
00218   {
00219     Image<float> tmp = convolve(input, itsFilters[i], CONV_BOUNDARY_CLEAN); //No normalization
00220     //Image<float> tmp = convolveHmax(input, itsFilters[i]); // normalize by image energy
00221     tmp = abs(tmp);
00222     normSum += sum(tmp);
00223     featuresValues[i] = tmp;
00224   }
00225 
00226 
00227   for(uint feature=0; feature<featuresValues.size(); feature++) //for each feature m
00228   {
00229     Image<float> featureVal = featuresValues[feature];
00230 
00231     std::vector<Histogram> levelHists;
00232     for(int level = 0; level < 4; level++)
00233     {
00234       int levelSize = 1<<level;
00235       Histogram hist(levelSize*levelSize);
00236 
00237       int xSpace = (featureVal.getWidth()/levelSize) + 1;
00238       int ySpace = (featureVal.getHeight()/levelSize) + 1;
00239 
00240       for(int y=0; y<featureVal.getHeight(); y++)
00241         for(int x=0; x<featureVal.getWidth(); x++)
00242         {
00243           int binPos = (int)(x/xSpace + levelSize*(y/ySpace));
00244           hist.addValue(binPos, featureVal.getVal(x,y));
00245         }
00246       hist.normalize(normSum); //normalize across the sum of all feature outputs
00247       levelHists.push_back(hist);
00248     }
00249 
00250     //print the histograms
00251     printf("%i %i ",feature, (int)levelHists.size());
00252     for(uint h=0; h<levelHists.size(); h++)
00253     {
00254       Histogram hist = levelHists[h];
00255       for(uint i=0; i<hist.size(); i++)
00256         printf("%f ", hist[i]);
00257     }
00258     printf("\n");
00259 
00260   }
00261 
00262 
00263 
00264 }
00265 
00266 void ObjRecBOF::extractSIFTFeatures(const Image<PixRGB<byte> > &img)
00267 {
00268 
00269   Image<float> input = luminance(img); //convert to gray
00270   LINFO("Get sift vector");
00271   for(int y=10; y<input.getHeight()-10; y+=10)
00272     for(int x=10; x<input.getWidth()-10; x+=10)
00273     {
00274       std::vector<std::vector<byte> >  sd = getSiftDescriptor(input, x,y,2);
00275       for(uint j=0; j<sd.size(); j++)
00276       {
00277         printf("%i %i ", x, y);
00278         for(uint i=0; i<sd[j].size(); i++)
00279         {
00280           printf("%i ", sd[j][i]);
00281         }
00282         printf("\n");
00283       }
00284     }
00285 
00286 }
00287 
00288 
00289 void ObjRecBOF::extractCodeWords(const char* dirname)
00290 {
00291 
00292   Image<float> debugImg(256,256, ZEROS);
00293   readSaliencyFeatures(dirname);
00294 
00295   //
00296   for(uint obj=0; obj<itsObjects.size(); obj++)
00297     for(uint kp=0; kp<itsObjects[obj].keypoints.size(); kp++)
00298     {
00299       float x = itsObjects[obj].keypoints[kp].fv[0];
00300       float y = itsObjects[obj].keypoints[kp].fv[1];
00301       //float val = debugImg.getVal((int)x,(int)y);
00302       debugImg.setVal((int)x,(int)y,128.0);
00303     }
00304 
00305   //printFeatures();
00306   getCodeWords(200);
00307   //readCodeWords("/lab/tmpib/u/objRec/salBayes/fv/codeWords.dat");
00308   //printCodeWords();
00309   for(uint j=0; j<itsCodeWords.size(); j++)
00310   {
00311       float x = itsCodeWords[j][0];
00312       float y = itsCodeWords[j][1];
00313       //float val = debugImg.getVal((int)x,(int)y);
00314       drawCircle(debugImg, Point2D<int>((int)x,(int)y), 3, 255.0F);
00315   }
00316   SHOWIMG(debugImg);
00317 
00318   //assignCodeWords();
00319   //printAssignedCodeWords();
00320 
00321 }
00322 
00323 void ObjRecBOF::printFeatures()
00324 {
00325 
00326   for (uint i=0; i<itsObjects[0].keypoints.size(); i++)
00327   {
00328     printf("%i %f %f ", i,
00329         itsObjects[0].keypoints[i].x,
00330         itsObjects[0].keypoints[i].y);
00331 
00332     std::vector<double> fv = itsObjects[0].keypoints[i].fv;
00333     for (uint j=0; j<fv.size(); j++)
00334       printf("%f ", fv[j]);
00335     printf("\n");
00336   }
00337 
00338 }
00339 
00340 void ObjRecBOF::printCodeWords()
00341 {
00342 
00343   for(uint j=0; j<itsCodeWords.size(); j++)
00344   {
00345     for(uint i=0; i<itsCodeWords[j].size(); i++)
00346       printf("%f ", itsCodeWords[j][i]);
00347     printf("\n");
00348   }
00349 }
00350 
00351 void ObjRecBOF::printAssignedCodeWords()
00352 {
00353   for(uint obj=0; obj<itsObjects.size(); obj++)
00354   {
00355     char filename[255];
00356     snprintf(filename, sizeof(filename), "%s.dat", itsObjects[obj].name.c_str());
00357     FILE* fp = fopen(filename, "w");
00358     if (!fp)
00359       LFATAL("Error writing %s", itsObjects[obj].name.c_str());
00360 
00361     for(uint kp=0; kp<itsObjects[obj].keypoints.size(); kp++)
00362     {
00363       fprintf(fp, "%f %f %i ",
00364           itsObjects[obj].keypoints[kp].x,
00365           itsObjects[obj].keypoints[kp].y,
00366           itsObjects[obj].keypoints[kp].codeWord);
00367       //for(uint i=0; i<itsObjects[obj].keypoints[kp].fv.size(); i++)
00368       //  fprintf(fp, "%f ", itsObjects[obj].keypoints[kp].fv[i]);
00369       fprintf(fp, "\n");
00370     }
00371     fclose(fp);
00372   }
00373 }
00374 
00375 void ObjRecBOF::assignCodeWords()
00376 {
00377   for(uint obj=0; obj<itsObjects.size(); obj++)
00378     for(uint kp=0; kp<itsObjects[obj].keypoints.size(); kp++)
00379     {
00380       int codeWord = assignCodeWord(itsObjects[obj].keypoints[kp].fv);
00381       itsObjects[obj].keypoints[kp].codeWord = codeWord;
00382     }
00383 }
00384 
00385 int ObjRecBOF::assignCodeWord(const std::vector<double> &fv)
00386 {
00387   std::vector<double>::const_iterator s = fv.begin();
00388   int k_best = 0;
00389   double min_dist = std::numeric_limits<double>::max();
00390 
00391   //find the center with the lowest distance to this data vector
00392   for(uint k=0; k<itsCodeWords.size(); k++)
00393   {
00394     std::vector<double>::iterator c = itsCodeWords[k].begin(); //the mean vector for this label
00395     double dist = 0;
00396 
00397     uint j = 0;
00398     for(; j<= itsCodeWords[k].size() - 4; j += 4)
00399     {
00400       double t0 = c[j] - s[j];
00401       double t1 = c[j+1] - s[j+1];
00402       dist += t0*t0 + t1*t1;
00403 
00404       t0 = c[j+2] - s[j+2];
00405       t1 = c[j+3] - s[j+3];
00406       dist += t0*t0 + t1*t1;
00407     }
00408 
00409     for( ; j < itsCodeWords[k].size(); j++ )
00410     {
00411       double t = c[j] - s[j];
00412       dist += t*t;
00413     }
00414 
00415     if (min_dist > dist)
00416     {
00417       min_dist = dist;
00418       k_best = k;
00419     }
00420   }
00421 
00422   return k_best;
00423 }
00424 
00425 void ObjRecBOF::readCodeWords(const char* filename)
00426 {
00427   LINFO("Reading %s", filename);
00428   FILE *fp;
00429   fp = fopen(filename, "r");
00430   if (!fp)
00431     LFATAL("Error reading %s\n", filename);
00432 
00433   int nCodeWords, nDim;
00434 
00435   if (fscanf(fp, "%d %d", &nCodeWords, &nDim) != 2)
00436     LFATAL("Invalid codeword file beginning.");
00437 
00438   itsCodeWords.clear();
00439   itsCodeWords.resize(nCodeWords);
00440   for (int i = 0; i < nCodeWords; i++) {
00441 
00442     std::vector<double> codeWord(nDim);
00443 
00444     for (int j = 0; j < nDim; j++) {
00445       float val;
00446       if (fscanf(fp, "%f", &val) != 1)
00447         LFATAL("Invalid code word.");
00448       codeWord[j] = val;
00449     }
00450     itsCodeWords[i] = codeWord;
00451   }
00452 
00453   fclose(fp);
00454 }
00455 
00456 
00457 void ObjRecBOF::getCodeWords(int numCodeWords)
00458 {
00459 
00460   initRandomNumbers();
00461 
00462   int numOfKeypoints = 0;
00463   int nDims = 0;
00464   //find the number of data and the dimentions
00465   for(uint i=0; i<itsObjects.size(); i++)
00466   {
00467     numOfKeypoints += itsObjects[i].keypoints.size();
00468     if (itsObjects[i].keypoints.size() > 0)
00469       nDims = itsObjects[i].keypoints[0].fv.size();
00470   }
00471   ASSERT(numOfKeypoints != 0 && nDims != 0);
00472 
00473 
00474   std::vector<std::vector<double> > centers(numCodeWords, std::vector<double>(nDims));
00475   std::vector<std::vector<double> > oldCenters(numCodeWords, std::vector<double>(nDims));
00476   std::vector<int> counters(numCodeWords);
00477 
00478   std::vector<int> labels(numOfKeypoints);
00479 
00480   //initalize labels randomly
00481   for(uint i=0; i<labels.size(); i++)
00482     labels[i] = randomUpToNotIncluding(numCodeWords);
00483 
00484   //iterate to find clusters centers
00485   int maxIter = 10;
00486   double epsilon = 1.0;
00487   double max_dist = epsilon*2;
00488 
00489   for(int iter = 0; iter < maxIter; iter++)
00490   {
00491     //zero centers
00492     for(uint j=0; j<centers.size(); j++)
00493       for(uint i=0; i<centers[j].size(); i++)
00494          centers[j][i] = 0;
00495 
00496     //zero counters
00497     for(uint i=0; i<counters.size(); i++)
00498       counters[i] = 0;
00499 
00500     //compute centers from each sample
00501     uint idx = 0;
00502     for(uint obj=0; obj<itsObjects.size(); obj++)
00503     {
00504       uint numKp = itsObjects[obj].keypoints.size();
00505       for (uint kp=0; kp<numKp; kp++)
00506       {
00507         std::vector<double>::iterator s = itsObjects[obj].keypoints[kp].fv.begin(); //the data vector
00508         uint k = labels[idx];  //the label assigned to this vector
00509         std::vector<double>::iterator c = centers[k].begin(); //the mean vector for this label
00510 
00511         int j;
00512         for(j=0; j<= (int)centers[k].size() - 4; j += 4)
00513         {
00514           double t0 = c[j] + s[j];
00515           double t1 = c[j+1] + s[j+1];
00516 
00517           c[j] = t0;
00518           c[j+1] = t1;
00519 
00520           t0 = c[j+2] + s[j+2];
00521           t1 = c[j+3] + s[j+3];
00522 
00523           c[j+2] = t0;
00524           c[j+3] = t1;
00525         }
00526         for( ; j < (int)centers[k].size(); j++ )
00527           c[j] += s[j];
00528 
00529         counters[k]++; //increment the number of datapoints in this class
00530         idx++;
00531       }
00532     }
00533 
00534     if (iter > 0)
00535       max_dist = 0;
00536 
00537     for(uint k=0; k<centers.size(); k++)
00538     {
00539       std::vector<double>::iterator c = centers[k].begin(); //the mean vector for this label
00540       if (counters[k] != 0)  //if we have some members in the class
00541       {
00542         //scale the mean vector with the number of its members
00543         double scale = 1./counters[k];
00544         for(uint j = 0; j < centers[k].size(); j++ )
00545           c[j] *= scale;
00546       }
00547       else
00548       {
00549         //random initalization
00550         //pick a keypoint at random from any object
00551         int obj = randomUpToNotIncluding(itsObjects.size());
00552         int kp = randomUpToNotIncluding(itsObjects[obj].keypoints.size());
00553 
00554         //assign the cluster center from this keypoint
00555         for(uint j=0; j<centers[k].size(); j++)
00556           c[j] = itsObjects[obj].keypoints[kp].fv[j];
00557       }
00558 
00559       if (iter > 0)
00560       {
00561         //find the distance betwwen the old center hypothisis and the new one
00562         double dist = 0;
00563         std::vector<double>::iterator c_o = oldCenters[k].begin();
00564         for(uint j = 0; j < centers[k].size(); j++ )
00565         {
00566           double t = c[j] - c_o[j];
00567           dist += t*t;
00568         }
00569         if( max_dist < dist )
00570           max_dist = dist;
00571       }
00572     }
00573 
00574     //assign the labels to the new clusters
00575     idx = 0;
00576     for(uint obj=0; obj<itsObjects.size(); obj++)
00577     {
00578       uint numKp = itsObjects[obj].keypoints.size();
00579       for (uint kp=0; kp<numKp; kp++)
00580       {
00581         std::vector<double>::iterator s = itsObjects[obj].keypoints[kp].fv.begin(); //the data vector
00582         int k_best = 0;
00583         double min_dist = std::numeric_limits<double>::max();
00584 
00585         //find the center with the lowest distance to this data vector
00586         for(uint k=0; k<centers.size(); k++)
00587         {
00588           std::vector<double>::iterator c = centers[k].begin(); //the mean vector for this label
00589           double dist = 0;
00590 
00591           int j = 0;
00592           for(; j<= (int)centers[k].size() - 4; j += 4)
00593           {
00594             double t0 = c[j] - s[j];
00595             double t1 = c[j+1] - s[j+1];
00596             dist += t0*t0 + t1*t1;
00597 
00598             t0 = c[j+2] - s[j+2];
00599             t1 = c[j+3] - s[j+3];
00600             dist += t0*t0 + t1*t1;
00601           }
00602 
00603           for( ; j < (int)centers[k].size(); j++ )
00604           {
00605             double t = c[j] - s[j];
00606             dist += t*t;
00607           }
00608 
00609           if (min_dist > dist)
00610           {
00611             min_dist = dist;
00612             k_best = k;
00613           }
00614         }
00615         labels[idx] = k_best;
00616         idx++;
00617       }
00618     }
00619 
00620     if (max_dist < epsilon)
00621       break;
00622 
00623     //if we did not terminate, then set the swap old centers to the new centers
00624     for(uint j=0; j<centers.size(); j++)
00625     {
00626       for(uint i=0; i<centers[j].size(); i++)
00627       {
00628         double tmp = centers[j][i];
00629         centers[j][i] = oldCenters[j][i];
00630         oldCenters[j][i] = tmp;
00631       }
00632     }
00633 
00634   }
00635 
00636   //ensure that we do not have empty clusters
00637   for(uint i=0; i<counters.size(); i++)
00638     counters[i] = 0;
00639 
00640   uint idx = 0;
00641   for(uint obj=0; obj<itsObjects.size(); obj++)
00642   {
00643     uint numKp = itsObjects[obj].keypoints.size();
00644     for (uint kp=0; kp<numKp; kp++)
00645     {
00646       counters[labels[idx]]++;
00647       idx++;
00648     }
00649   }
00650 
00651 
00652   //for(uint i=0; i<labels.size(); i++)
00653   //  printf("%i %i\n", i, labels[i]);
00654 
00655   //for(uint i=0; i<counters.size(); i++)
00656   //{
00657   //  if (counters[i] == 0)
00658   //    LINFO("Counter is zero");
00659   //}
00660 
00661   itsCodeWords = centers;
00662 
00663 }
00664 
00665 
00666 /*
00667 void ObjRecBOF::getCodeWords(int numCodeWords)
00668 {
00669   int numOfKeypoints = 0;
00670   int nDims = 0;
00671 
00672   //find the number of data and the dimentions
00673   for(uint i=0; i<itsObjects.size(); i++)
00674   {
00675     numOfKeypoints += itsObjects[i].keypoints.size();
00676     if (itsObjects[i].keypoints.size() > 0)
00677       nDims = itsObjects[i].keypoints[0].fv.size();
00678   }
00679 
00680 
00681   //insert values into the data tables
00682   CvMat *data = cvCreateMat(numOfKeypoints, nDims, CV_32FC1);
00683   CvMat *labels = cvCreateMat(numOfKeypoints, 1, CV_32SC1);
00684 
00685   cvZero(labels);
00686   int idx=0;
00687   for(uint obj=0; obj<itsObjects.size(); obj++)
00688   {
00689     for (uint i=0; i<itsObjects[obj].keypoints.size(); i++)
00690     {
00691       std::vector<double> fv = itsObjects[obj].keypoints[i].fv;
00692       for (uint j=0; j<fv.size(); j++)
00693       {
00694         cvmSet(data, idx, j, fv[j]);
00695         //cvSetReal2D(data, idx, j, fv[j]);
00696       }
00697       idx++;
00698     }
00699   }
00700 
00701   //run kmeans
00702   cvKMeans2(data, numCodeWords, labels,
00703       cvTermCriteria( CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10, 1.0 ));
00704 
00705   for(int i=0; i<numOfKeypoints; i++)
00706   {
00707     printf("%i %i\n", i, labels->data.i[i]);
00708   }
00709   //get the clusters means
00710   CvMat *clusterMeans = cvCreateMat(numCodeWords, nDims, CV_32FC1);
00711   CvMat *nDataCount = cvCreateMat(numCodeWords, 1, CV_32FC1);
00712   cvZero(clusterMeans);
00713   cvZero(nDataCount);
00714 
00715   for(int i=0; i<numOfKeypoints; i++)
00716   {
00717     for(int j=0; j<nDims; j++)
00718     {
00719       double val = cvGetReal2D(clusterMeans, labels->data.i[i], j);
00720       double val2 = cvGetReal2D(data, i, j);
00721       cvSetReal2D(clusterMeans, labels->data.i[i], j, val+val2);
00722 
00723       // printf("%0.1f %0.1f %0.1f\n", val, val2, count);
00724 
00725     }
00726     double count = cvmGet(nDataCount, labels->data.i[i], 0);
00727     cvmSet(nDataCount, labels->data.i[i], 0, count+1);
00728   }
00729 
00730   //inset the cluster means into the codeWords
00731   itsCodeWords.resize(numCodeWords);
00732   for(int i=0; i<numCodeWords; i++)
00733   {
00734     double count = cvmGet(nDataCount, i, 0);
00735     std::vector<double> codes;
00736     codes.resize(nDims);
00737     for(int j=0; j<nDims; j++)
00738     {
00739       double sum = cvmGet(clusterMeans, i, j);
00740       //cvmSet(clusterMeans, i, j, sum/count);
00741       codes[j] = sum/count;
00742     }
00743     itsCodeWords[i] = codes;
00744   }
00745 
00746 
00747   cvReleaseMat(&data);
00748   cvReleaseMat(&labels);
00749   cvReleaseMat(&clusterMeans);
00750   cvReleaseMat(&nDataCount);
00751 }
00752 */
00753 
00754 void ObjRecBOF::readSaliencyFeatures(const char* dirname)
00755 {
00756   DIR *dp = opendir(dirname);
00757   if (dp == NULL)
00758     LFATAL("Can not open %s", dirname);
00759   dirent *dirp;
00760   while ((dirp = readdir(dp)) != NULL ) {
00761     std::string dirName(dirp->d_name);
00762     if (dirName.find(".dat") != std::string::npos)
00763     {
00764       char filename[255];
00765       snprintf(filename, sizeof(filename), "%s/%s", dirname, dirp->d_name);
00766       Object obj;
00767       obj.name = std::string(dirp->d_name);
00768       obj.keypoints = readSaliencyKeypoints(filename); //read the keypoints
00769       itsObjects.push_back(obj);
00770     }
00771   }
00772   closedir(dp);
00773 }
00774 
00775 std::vector<ObjRecBOF::Keypoint> ObjRecBOF::readSaliencyKeypoints(const char *filename)
00776 {
00777 
00778   std::vector<Keypoint> keypoints;
00779   LINFO("Reading %s", filename);
00780   FILE *fp;
00781   fp = fopen(filename, "r");
00782   if (!fp)
00783   {
00784     LINFO("Error reading %s", filename);
00785     return keypoints;
00786   }
00787 
00788   int len = 42;
00789   while (fp != NULL)
00790   {
00791     Keypoint key;
00792     int keyNum, x, y;
00793     if (fscanf(fp, "%i %i %i", &keyNum, &x, &y) != 3) break;
00794     key.x = (double)x;
00795     key.y = (double)y;
00796     key.codeWord = -1;
00797 
00798     for (int j = 0; j < len; j++) {
00799       float val = -1;
00800       if (fscanf(fp, "%f", &val) != 1)
00801         perror("Invalid keypoint file value.");
00802       key.fv.push_back(val);
00803     }
00804     keypoints.push_back(key);
00805   }
00806 
00807   fclose(fp);
00808   return keypoints;
00809 }
00810 
00811 std::vector<ObjRecBOF::Keypoint> ObjRecBOF::readSIFTKeypoints(const char *filename)
00812 {
00813 
00814   std::vector<Keypoint> keypoints;
00815   printf("Reading %s\n", filename);
00816   FILE *fp;
00817   fp = fopen(filename, "r");
00818   if (!fp)
00819   {
00820     LINFO("Error reading %s", filename);
00821     return keypoints;
00822   }
00823 
00824   int i, j, num, len, val;
00825 
00826   if (fscanf(fp, "%d %d", &num, &len) != 2)
00827     perror("Invalid keypoint file beginning.");
00828 
00829   if (len != 128)
00830     perror("Keypoint descriptor length invalid (should be 128).");
00831   for (i = 0; i < num; i++) {
00832 
00833     Keypoint key;
00834     float x, y, scale, ori;
00835     if (fscanf(fp, "%f %f %f %f", &x, &y, &scale, &ori) != 4) break;
00836     key.x = x;
00837     key.y = y;
00838     key.scale = scale;
00839     key.ori = ori;
00840 
00841     for (j = 0; j < len; j++) {
00842       if (fscanf(fp, "%d", &val) != 1 || val < 0 || val > 255)
00843         perror("Invalid keypoint file value.");
00844       key.fv.push_back((double)val);
00845     }
00846     keypoints.push_back(key);
00847   }
00848 
00849   fclose(fp);
00850 
00851   return keypoints;
00852 }
00853 
00854 std::vector<ObjRecBOF::Keypoint> ObjRecBOF::readIlabSIFTKeypoints(const char *filename)
00855 {
00856 
00857   std::vector<Keypoint> keypoints;
00858   printf("Reading %s\n", filename);
00859   FILE *fp;
00860   fp = fopen(filename, "r");
00861   if (!fp)
00862   {
00863     LINFO("Error reading %s", filename);
00864     return keypoints;
00865   }
00866 
00867   int len = 128;
00868 
00869   while (fp != NULL)
00870   {
00871     Keypoint key;
00872     int x, y;
00873     if (fscanf(fp, "%i %i", &x, &y) != 2) break;
00874     key.x = x;
00875     key.y = y;
00876 
00877     for (int j = 0; j < len; j++) {
00878       int val;
00879       if (fscanf(fp, "%d", &val) != 1 || val < 0 || val > 255)
00880         perror("Invalid keypoint file value.");
00881       key.fv.push_back((double)val);
00882     }
00883     keypoints.push_back(key);
00884   }
00885 
00886   fclose(fp);
00887 
00888   return keypoints;
00889 }
00890 
00891 std::vector<std::vector<byte> >  ObjRecBOF::getSiftDescriptor(const Image<float> &lum,
00892     const float x, const float y, const float s)
00893 {
00894 
00895   Image<float> mag, ori;
00896 
00897   //gradientSobel(lum, mag, ori, 3);
00898   gradient(lum, mag, ori);
00899 
00900   Histogram OV(itsNumOriArray);
00901 
00902   // 1. Calculate main orientation vector
00903   calculateOrientationVector(x, y, s, mag, ori, OV);
00904 
00905   //Image<byte> histImg = OV.getHistogramImage(256, 256, 0, -1);
00906 
00907   // 2. Create feature vector and keypoint for each significant
00908   // orientation peak:
00909   return createVectorsAndKeypoints(x, y, s, mag, ori, OV);
00910 
00911 }
00912 
00913 void ObjRecBOF::calculateOrientationVector(const float x, const float y, const float s,
00914                 const Image<float>& gradmag, const Image<float>& gradorie, Histogram& OV) {
00915 
00916 
00917         // compute the effective blurring sigma corresponding to the
00918         // fractional scale s:
00919         const float sigma = s;
00920 
00921         const float sig = 1.5F * sigma, inv2sig2 = - 0.5F / (sig * sig);
00922         const int dimX = gradmag.getWidth(), dimY = gradmag.getHeight();
00923 
00924         const int xi = int(x + 0.5f);
00925         const int yi = int(y + 0.5f);
00926 
00927         const int rad = int(3.0F * sig);
00928         const int rad2 = rad * rad;
00929 
00930 
00931         // search bounds:
00932         int starty = yi - rad; if (starty < 0) starty = 0;
00933         int stopy = yi + rad; if (stopy >= dimY) stopy = dimY-1;
00934 
00935         // 1. Calculate orientation vector
00936         for (int ind_y = starty; ind_y <= stopy; ind_y ++)
00937         {
00938                 // given that y, get the corresponding range of x values that
00939                 // lie within the disk (and remain within the image):
00940                 const int yoff = ind_y - yi;
00941                 const int bound = int(sqrtf(float(rad2 - yoff*yoff)) + 0.5F);
00942                 int startx = xi - bound; if (startx < 0) startx = 0;
00943                 int stopx = xi + bound; if (stopx >= dimX) stopx = dimX-1;
00944 
00945                 for (int ind_x = startx; ind_x <= stopx; ind_x ++)
00946                 {
00947                         const float dx = float(ind_x) - x, dy = float(ind_y) - y;
00948                         const float distSq = dx * dx + dy * dy;
00949 
00950                         // get gradient:
00951                         const float gradVal = gradmag.getVal(ind_x, ind_y);
00952 
00953                         // compute the gaussian weight for this histogram entry:
00954                         const float gaussianWeight = expf(distSq * inv2sig2);
00955 
00956                         // add this orientation to the histogram
00957                         // [-pi ; pi] -> [0 ; 2pi]
00958                         float angle = gradorie.getVal(ind_x, ind_y) + M_PI;
00959 
00960                         // [0 ; 2pi] -> [0 ; 36]
00961                         angle = 0.5F * angle * itsNumOriArray / M_PI;
00962                         while (angle < 0.0F) angle += itsNumOriArray;
00963                         while (angle >= itsNumOriArray) angle -= itsNumOriArray;
00964 
00965                         OV.addValueInterp(angle, gaussianWeight * gradVal);
00966                 }
00967         }
00968 
00969 
00970         // smooth the orientation histogram 3 times:
00971         for (int i = 0; i < 3; i++) OV.smooth();
00972 }
00973 
00974 // ######################################################################
00975 
00976 
00977 std::vector<std::vector<byte> > ObjRecBOF::createVectorsAndKeypoints(const float x,
00978     const float y, const float s,
00979     const Image<float>& gradmag, const Image<float>& gradorie, Histogram& OV)
00980 {
00981 
00982   const float sigma = s; //itsSigma * powf(2.0F, s / float(itsLumBlur.size() - 3));
00983 
00984   // find the max in the histogram:
00985   float maxPeakValue = OV.findMax();
00986 
00987   const int xi = int(x + 0.5f);
00988   const int yi = int(y + 0.5f);
00989 
00990   uint numkp = 0;
00991 
00992   std::vector<std::vector<byte> > descriptor;
00993 
00994   // 2. Create feature vector and keypoint for each significant
00995   // orientation peak:
00996   for (int bin = 0; bin < itsNumOriArray; bin++)
00997   {
00998     // consider the peak centered around 'bin':
00999     const float midval = OV.getValue(bin);
01000 
01001     // if current value much smaller than global peak or = to zero, forget it:
01002     if (midval == 0 || midval < 0.8F * maxPeakValue) continue;
01003 
01004     // get value to the left of current value
01005     const float leftval = OV.getValue((bin == 0) ? itsNumOriArray-1 : bin-1);
01006 
01007     // get value to the right of current value
01008     const float rightval = OV.getValue((bin == itsNumOriArray-1) ? 0 : bin+1);
01009 
01010     // only consider local peaks:
01011     if (leftval > midval) continue;
01012     if (rightval > midval) continue;
01013 
01014     // interpolate the values to get the orientation of the peak:
01015     //  with f(x) = ax^2 + bx + c
01016     //   f(-1) = x0 = leftval
01017     //   f( 0) = x1 = midval
01018     //   f(+1) = x2 = rightval
01019     //  => a = (x0+x2)/2 - x1
01020     //     b = (x2-x0)/2
01021     //     c = x1
01022     // f'(x) = 0 => x = -b/2a
01023     const float a  = 0.5f * (leftval + rightval) - midval;
01024     const float b  = 0.5f * (rightval - leftval);
01025     float realangle = float(bin) - 0.5F * b / a;
01026 
01027     realangle *= 2.0F * M_PI / itsNumOriArray; // [0:36] to [0:2pi]
01028     realangle -= M_PI;                      // [0:2pi] to [-pi:pi]
01029 
01030     // ############ Create keypoint:
01031 
01032     // compute the feature vector:
01033     FeatureVector fv;
01034 
01035     const float sinAngle = sin(realangle), cosAngle = cos(realangle);
01036 
01037     // check this scale
01038     const int radius = int(5.0F * sigma + 0.5F); // NOTE: Lowe uses radius=8?
01039     const float gausssig = float(radius); // 1/2 width of descript window
01040     const float gaussfac = - 0.5F / (gausssig * gausssig);
01041 
01042 
01043     // Scan a window of diameter 2*radius+1 around the point of
01044     // interest, and we will cumulate local samples into a 4x4 grid
01045     // of bins, with interpolation. NOTE: rx and ry loop over a
01046     // square that is assumed centered around the point of interest
01047     // and rotated to the gradient orientation (realangle):
01048 
01049     int scale = abs(int(s));
01050     scale = scale > 5 ? 5 : scale;
01051 
01052     for (int ry = -radius; ry <= radius; ry++)
01053       for (int rx = -radius; rx <= radius; rx++)
01054       {
01055         // rotate the point:
01056         const float newX = rx * cosAngle - ry * sinAngle;
01057         const float newY = rx * sinAngle + ry * cosAngle;
01058 
01059         // get the coords in the image frame of reference:
01060         const float orgX = newX + float(xi);
01061         const float orgY = newY + float(yi);
01062 
01063         // if outside the image, forget it:
01064         if (gradmag.coordsOk(orgX, orgY) == false) continue;
01065 
01066         // find the fractional coords of the corresponding bin
01067         // (we subdivide our window into a 4x4 grid of bins):
01068         const float xf = 2.0F + 2.0F * float(rx) / float(radius);
01069         const float yf = 2.0F + 2.0F * float(ry) / float(radius);
01070 
01071 
01072         // find the Gaussian weight from distance to center and
01073         // get weighted gradient magnitude:
01074         const float gaussFactor = expf((newX*newX+newY*newY) * gaussfac);
01075         const float weightedMagnitude =
01076           gaussFactor * gradmag.getValInterp(orgX, orgY);
01077 
01078         // get the gradient orientation relative to the keypoint
01079         // orientation and scale it for 8 orientation bins:
01080         float gradAng = gradorie.getValInterp(orgX, orgY) - realangle;
01081 
01082         gradAng=fmod(gradAng, 2*M_PI); //bring the range from 0 to M_PI
01083 
01084         //convert from -M_PI to M_PI
01085         if (gradAng < 0.0) gradAng+=2*M_PI; //convert to -M_PI to M_PI
01086         if (gradAng >= M_PI) gradAng-=2*M_PI;
01087         //split to eight bins
01088         const float orient = (gradAng + M_PI) * 8 / (2 * M_PI);
01089 
01090         /*
01091         //reflect the angle to convert from 0 to M_PI
01092         if (gradAng >= M_PI) gradAng-=M_PI;
01093         //split to four bins
01094         const float orient = (gradAng + M_PI) * 4 / (2 * M_PI);
01095          */
01096 
01097         // will be interpolated into 2 x 2 x 2 bins:
01098 
01099         fv.addValue(xf, yf, orient, weightedMagnitude);
01100 
01101       }
01102 
01103     // normalize, clamp, scale and convert to byte:
01104     std::vector<byte> oriVec;
01105 
01106     fv.toByteKey(oriVec);
01107 
01108     double mag = fv.getMag();
01109     if (oriVec.size() > 0 && mag > 0)
01110       descriptor.push_back(oriVec);
01111     //The key point
01112 
01113     ++ numkp;
01114 
01115   }
01116   return descriptor;
01117 }
01118 
01119 
01120 std::string ObjRecBOF::test(const Image<PixRGB<byte> > &img)
01121 {
01122 
01123 
01124   return std::string("Test");
01125 }
01126 
01127 
01128 // ######################################################################
01129 /* So things look consistent in everyone's emacs... */
01130 /* Local Variables: */
01131 /* indent-tabs-mode: nil */
01132 /* End: */
Generated on Sun May 8 08:41:08 2011 for iLab Neuromorphic Vision Toolkit by  doxygen 1.6.3