00001 /*!@file ObjRec/ObjRecBOF.C ObjRec using bag of features */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00005 // University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Lior Elazary <elazary@usc.edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/ObjRec/ObjRecBOF.C $ 00035 // $Id: ObjRecBOF.C 13716 2010-07-28 22:07:03Z itti $ 00036 // 00037 00038 #include "ObjRec/ObjRecBOF.H" 00039 #include "Component/OptionManager.H" 00040 #include "Image/ColorOps.H" 00041 #include "GUI/DebugWin.H" 00042 #include "Image/DrawOps.H" 00043 #include "Image/FilterOps.H" 00044 #include "Image/ImageSet.H" 00045 #include "Image/Kernels.H" 00046 #include "Image/MathOps.H" 00047 #include <dirent.h> 00048 00049 00050 00051 // ###################################################################### 00052 ObjRecBOF::ObjRecBOF(OptionManager& mgr, const std::string& descrName, 00053 const std::string& tagName) : 00054 ModelComponent(mgr, descrName, tagName), 00055 itsNumOriArray(36) 00056 { 00057 00058 } 00059 00060 void ObjRecBOF::start2() 00061 { 00062 00063 00064 } 00065 00066 ObjRecBOF::~ObjRecBOF() 00067 { 00068 } 00069 00070 00071 void ObjRecBOF::train(const Image<PixRGB<byte> > &img, const std::string label) 00072 { 00073 00074 } 00075 00076 void ObjRecBOF::train(const std::string &name, int cls) 00077 { 00078 00079 //getSaliencyKeypoints(name.c_str()); 00080 //getSIFTKeypoints(name.c_str()); 00081 getIlabSIFTKeypoints(name.c_str()); 00082 00083 } 00084 00085 void ObjRecBOF::getSaliencyKeypoints(const std::string &name) 00086 { 00087 00088 char filename[255]; 00089 const char* dirname = "/lab/tmpib/u/objRec/bof/salBayes/fv"; 00090 snprintf(filename, sizeof(filename), "%s/%s.jpg.dat", dirname, name.c_str()); 00091 Object obj; 00092 obj.name = name; 00093 obj.keypoints = readSaliencyKeypoints(filename); //read the keypoints 00094 if (obj.keypoints.size() > 0) 00095 itsObjects.push_back(obj); 00096 } 00097 00098 void ObjRecBOF::getSIFTKeypoints(const std::string &name) 00099 { 00100 00101 char filename[255]; 00102 const char* dirname = "/lab/tmpib/u/objRec/bof/siftDemoV4/voc/keyp"; 00103 snprintf(filename, sizeof(filename), "%s/%s.jpg.dat", dirname, name.c_str()); 00104 Object obj; 00105 obj.name = name; 00106 obj.keypoints = readSIFTKeypoints(filename); //read the keypoints 00107 if (obj.keypoints.size() > 0) 00108 itsObjects.push_back(obj); 00109 } 00110 00111 void ObjRecBOF::getIlabSIFTKeypoints(const std::string &name) 00112 { 00113 00114 char filename[255]; 00115 const char* dirname = "/home/elazary/images/VOCdevkit/VOC2007/ilabSift"; 00116 snprintf(filename, sizeof(filename), "%s/%s.jpg.dat", dirname, name.c_str()); 00117 Object obj; 00118 obj.name = name; 00119 obj.keypoints = readIlabSIFTKeypoints(filename); //read the keypoints 00120 if (obj.keypoints.size() > 0) 00121 itsObjects.push_back(obj); 00122 } 00123 00124 void ObjRecBOF::finalizeTraining() 00125 { 00126 getCodeWords(200); 00127 printCodeWords(); 00128 00129 } 00130 00131 void ObjRecBOF::finalizeTesting() 00132 { 00133 assignCodeWords(); 00134 printAssignedCodeWords(); 00135 } 00136 00137 void ObjRecBOF::getObjCodeWords(const std::string &name) 00138 { 00139 char filename[255]; 00140 const char* dirname = "/home/elazary/images/VOCdevkit/VOC2007/ilabSift"; 00141 snprintf(filename, sizeof(filename), "%s/%s.jpg.dat", dirname, name.c_str()); 00142 Object obj; 00143 obj.name = name; 00144 obj.keypoints = readIlabSIFTKeypoints(filename); //read the keypoints 00145 if (obj.keypoints.size() > 0) 00146 { 00147 for(uint kp=0; kp<obj.keypoints.size(); kp++) 00148 { 00149 int codeWord = assignCodeWord(obj.keypoints[kp].fv); 00150 obj.keypoints[kp].codeWord = codeWord; 00151 } 00152 } 00153 00154 snprintf(filename, sizeof(filename), "%s.dat", obj.name.c_str()); 00155 FILE* fp = fopen(filename, "w"); 00156 if (!fp) 00157 LFATAL("Error writing %s", obj.name.c_str()); 00158 00159 for(uint kp=0; kp<obj.keypoints.size(); kp++) 00160 { 00161 fprintf(fp, "%f %f %i ", 00162 obj.keypoints[kp].x, 00163 obj.keypoints[kp].y, 00164 obj.keypoints[kp].codeWord); 00165 //for(uint i=0; i<itsObjects[obj].keypoints[kp].fv.size(); i++) 00166 // fprintf(fp, "%f ", itsObjects[obj].keypoints[kp].fv[i]); 00167 fprintf(fp, "\n"); 00168 } 00169 fclose(fp); 00170 00171 } 00172 00173 Image<float> ObjRecBOF::extractFeatures(const Image<PixRGB<byte> > &img) 00174 { 00175 extractSIFTFeatures(img); 00176 //extractGaborFeatures(img); 00177 00178 return Image<float>(); 00179 } 00180 00181 void ObjRecBOF::extractGaborFeatures(const Image<PixRGB<byte> > &img) 00182 { 00183 00184 int itsNumOri = 8; 00185 int itsNumScales = 2; 00186 00187 float stdmin = 1.75F; 00188 float stdstep = 0.5F; 00189 int fsmin = 3; 00190 int fsstep = 1; 00191 00192 ImageSet<float> itsFilters; 00193 //create the filters 00194 for(int scale = 0; scale < itsNumScales; scale++) 00195 for(int ori = 0; ori < itsNumOri; ori++) 00196 { 00197 00198 Image<float> filter = dogFilter<float>(stdmin + stdstep * scale, 00199 (float)ori * 180.0F / (float)itsNumOri, 00200 fsmin + fsstep * scale); 00201 00202 // fsmin + fsstep * scale); 00203 // normalize to zero mean: 00204 filter -= mean(filter); 00205 00206 // normalize to unit sum-of-squares: 00207 // filter /= sum(squared(filter)); 00208 00209 itsFilters.push_back(filter); 00210 } 00211 00212 00213 double normSum = 0; //the numalization constant 00214 Image<float> input = luminance(img); //convert to gray 00215 //Compute the various low level features 00216 ImageSet<float> featuresValues(itsFilters.size()); 00217 for(uint i=0; i<itsFilters.size(); i++) 00218 { 00219 Image<float> tmp = convolve(input, itsFilters[i], CONV_BOUNDARY_CLEAN); //No normalization 00220 //Image<float> tmp = convolveHmax(input, itsFilters[i]); // normalize by image energy 00221 tmp = abs(tmp); 00222 normSum += sum(tmp); 00223 featuresValues[i] = tmp; 00224 } 00225 00226 00227 for(uint feature=0; feature<featuresValues.size(); feature++) //for each feature m 00228 { 00229 Image<float> featureVal = featuresValues[feature]; 00230 00231 std::vector<Histogram> levelHists; 00232 for(int level = 0; level < 4; level++) 00233 { 00234 int levelSize = 1<<level; 00235 Histogram hist(levelSize*levelSize); 00236 00237 int xSpace = (featureVal.getWidth()/levelSize) + 1; 00238 int ySpace = (featureVal.getHeight()/levelSize) + 1; 00239 00240 for(int y=0; y<featureVal.getHeight(); y++) 00241 for(int x=0; x<featureVal.getWidth(); x++) 00242 { 00243 int binPos = (int)(x/xSpace + levelSize*(y/ySpace)); 00244 hist.addValue(binPos, featureVal.getVal(x,y)); 00245 } 00246 hist.normalize(normSum); //normalize across the sum of all feature outputs 00247 levelHists.push_back(hist); 00248 } 00249 00250 //print the histograms 00251 printf("%i %i ",feature, (int)levelHists.size()); 00252 for(uint h=0; h<levelHists.size(); h++) 00253 { 00254 Histogram hist = levelHists[h]; 00255 for(uint i=0; i<hist.size(); i++) 00256 printf("%f ", hist[i]); 00257 } 00258 printf("\n"); 00259 00260 } 00261 00262 00263 00264 } 00265 00266 void ObjRecBOF::extractSIFTFeatures(const Image<PixRGB<byte> > &img) 00267 { 00268 00269 Image<float> input = luminance(img); //convert to gray 00270 LINFO("Get sift vector"); 00271 for(int y=10; y<input.getHeight()-10; y+=10) 00272 for(int x=10; x<input.getWidth()-10; x+=10) 00273 { 00274 std::vector<std::vector<byte> > sd = getSiftDescriptor(input, x,y,2); 00275 for(uint j=0; j<sd.size(); j++) 00276 { 00277 printf("%i %i ", x, y); 00278 for(uint i=0; i<sd[j].size(); i++) 00279 { 00280 printf("%i ", sd[j][i]); 00281 } 00282 printf("\n"); 00283 } 00284 } 00285 00286 } 00287 00288 00289 void ObjRecBOF::extractCodeWords(const char* dirname) 00290 { 00291 00292 Image<float> debugImg(256,256, ZEROS); 00293 readSaliencyFeatures(dirname); 00294 00295 // 00296 for(uint obj=0; obj<itsObjects.size(); obj++) 00297 for(uint kp=0; kp<itsObjects[obj].keypoints.size(); kp++) 00298 { 00299 float x = itsObjects[obj].keypoints[kp].fv[0]; 00300 float y = itsObjects[obj].keypoints[kp].fv[1]; 00301 //float val = debugImg.getVal((int)x,(int)y); 00302 debugImg.setVal((int)x,(int)y,128.0); 00303 } 00304 00305 //printFeatures(); 00306 getCodeWords(200); 00307 //readCodeWords("/lab/tmpib/u/objRec/salBayes/fv/codeWords.dat"); 00308 //printCodeWords(); 00309 for(uint j=0; j<itsCodeWords.size(); j++) 00310 { 00311 float x = itsCodeWords[j][0]; 00312 float y = itsCodeWords[j][1]; 00313 //float val = debugImg.getVal((int)x,(int)y); 00314 drawCircle(debugImg, Point2D<int>((int)x,(int)y), 3, 255.0F); 00315 } 00316 SHOWIMG(debugImg); 00317 00318 //assignCodeWords(); 00319 //printAssignedCodeWords(); 00320 00321 } 00322 00323 void ObjRecBOF::printFeatures() 00324 { 00325 00326 for (uint i=0; i<itsObjects[0].keypoints.size(); i++) 00327 { 00328 printf("%i %f %f ", i, 00329 itsObjects[0].keypoints[i].x, 00330 itsObjects[0].keypoints[i].y); 00331 00332 std::vector<double> fv = itsObjects[0].keypoints[i].fv; 00333 for (uint j=0; j<fv.size(); j++) 00334 printf("%f ", fv[j]); 00335 printf("\n"); 00336 } 00337 00338 } 00339 00340 void ObjRecBOF::printCodeWords() 00341 { 00342 00343 for(uint j=0; j<itsCodeWords.size(); j++) 00344 { 00345 for(uint i=0; i<itsCodeWords[j].size(); i++) 00346 printf("%f ", itsCodeWords[j][i]); 00347 printf("\n"); 00348 } 00349 } 00350 00351 void ObjRecBOF::printAssignedCodeWords() 00352 { 00353 for(uint obj=0; obj<itsObjects.size(); obj++) 00354 { 00355 char filename[255]; 00356 snprintf(filename, sizeof(filename), "%s.dat", itsObjects[obj].name.c_str()); 00357 FILE* fp = fopen(filename, "w"); 00358 if (!fp) 00359 LFATAL("Error writing %s", itsObjects[obj].name.c_str()); 00360 00361 for(uint kp=0; kp<itsObjects[obj].keypoints.size(); kp++) 00362 { 00363 fprintf(fp, "%f %f %i ", 00364 itsObjects[obj].keypoints[kp].x, 00365 itsObjects[obj].keypoints[kp].y, 00366 itsObjects[obj].keypoints[kp].codeWord); 00367 //for(uint i=0; i<itsObjects[obj].keypoints[kp].fv.size(); i++) 00368 // fprintf(fp, "%f ", itsObjects[obj].keypoints[kp].fv[i]); 00369 fprintf(fp, "\n"); 00370 } 00371 fclose(fp); 00372 } 00373 } 00374 00375 void ObjRecBOF::assignCodeWords() 00376 { 00377 for(uint obj=0; obj<itsObjects.size(); obj++) 00378 for(uint kp=0; kp<itsObjects[obj].keypoints.size(); kp++) 00379 { 00380 int codeWord = assignCodeWord(itsObjects[obj].keypoints[kp].fv); 00381 itsObjects[obj].keypoints[kp].codeWord = codeWord; 00382 } 00383 } 00384 00385 int ObjRecBOF::assignCodeWord(const std::vector<double> &fv) 00386 { 00387 std::vector<double>::const_iterator s = fv.begin(); 00388 int k_best = 0; 00389 double min_dist = std::numeric_limits<double>::max(); 00390 00391 //find the center with the lowest distance to this data vector 00392 for(uint k=0; k<itsCodeWords.size(); k++) 00393 { 00394 std::vector<double>::iterator c = itsCodeWords[k].begin(); //the mean vector for this label 00395 double dist = 0; 00396 00397 uint j = 0; 00398 for(; j<= itsCodeWords[k].size() - 4; j += 4) 00399 { 00400 double t0 = c[j] - s[j]; 00401 double t1 = c[j+1] - s[j+1]; 00402 dist += t0*t0 + t1*t1; 00403 00404 t0 = c[j+2] - s[j+2]; 00405 t1 = c[j+3] - s[j+3]; 00406 dist += t0*t0 + t1*t1; 00407 } 00408 00409 for( ; j < itsCodeWords[k].size(); j++ ) 00410 { 00411 double t = c[j] - s[j]; 00412 dist += t*t; 00413 } 00414 00415 if (min_dist > dist) 00416 { 00417 min_dist = dist; 00418 k_best = k; 00419 } 00420 } 00421 00422 return k_best; 00423 } 00424 00425 void ObjRecBOF::readCodeWords(const char* filename) 00426 { 00427 LINFO("Reading %s", filename); 00428 FILE *fp; 00429 fp = fopen(filename, "r"); 00430 if (!fp) 00431 LFATAL("Error reading %s\n", filename); 00432 00433 int nCodeWords, nDim; 00434 00435 if (fscanf(fp, "%d %d", &nCodeWords, &nDim) != 2) 00436 LFATAL("Invalid codeword file beginning."); 00437 00438 itsCodeWords.clear(); 00439 itsCodeWords.resize(nCodeWords); 00440 for (int i = 0; i < nCodeWords; i++) { 00441 00442 std::vector<double> codeWord(nDim); 00443 00444 for (int j = 0; j < nDim; j++) { 00445 float val; 00446 if (fscanf(fp, "%f", &val) != 1) 00447 LFATAL("Invalid code word."); 00448 codeWord[j] = val; 00449 } 00450 itsCodeWords[i] = codeWord; 00451 } 00452 00453 fclose(fp); 00454 } 00455 00456 00457 void ObjRecBOF::getCodeWords(int numCodeWords) 00458 { 00459 00460 initRandomNumbers(); 00461 00462 int numOfKeypoints = 0; 00463 int nDims = 0; 00464 //find the number of data and the dimentions 00465 for(uint i=0; i<itsObjects.size(); i++) 00466 { 00467 numOfKeypoints += itsObjects[i].keypoints.size(); 00468 if (itsObjects[i].keypoints.size() > 0) 00469 nDims = itsObjects[i].keypoints[0].fv.size(); 00470 } 00471 ASSERT(numOfKeypoints != 0 && nDims != 0); 00472 00473 00474 std::vector<std::vector<double> > centers(numCodeWords, std::vector<double>(nDims)); 00475 std::vector<std::vector<double> > oldCenters(numCodeWords, std::vector<double>(nDims)); 00476 std::vector<int> counters(numCodeWords); 00477 00478 std::vector<int> labels(numOfKeypoints); 00479 00480 //initalize labels randomly 00481 for(uint i=0; i<labels.size(); i++) 00482 labels[i] = randomUpToNotIncluding(numCodeWords); 00483 00484 //iterate to find clusters centers 00485 int maxIter = 10; 00486 double epsilon = 1.0; 00487 double max_dist = epsilon*2; 00488 00489 for(int iter = 0; iter < maxIter; iter++) 00490 { 00491 //zero centers 00492 for(uint j=0; j<centers.size(); j++) 00493 for(uint i=0; i<centers[j].size(); i++) 00494 centers[j][i] = 0; 00495 00496 //zero counters 00497 for(uint i=0; i<counters.size(); i++) 00498 counters[i] = 0; 00499 00500 //compute centers from each sample 00501 uint idx = 0; 00502 for(uint obj=0; obj<itsObjects.size(); obj++) 00503 { 00504 uint numKp = itsObjects[obj].keypoints.size(); 00505 for (uint kp=0; kp<numKp; kp++) 00506 { 00507 std::vector<double>::iterator s = itsObjects[obj].keypoints[kp].fv.begin(); //the data vector 00508 uint k = labels[idx]; //the label assigned to this vector 00509 std::vector<double>::iterator c = centers[k].begin(); //the mean vector for this label 00510 00511 int j; 00512 for(j=0; j<= (int)centers[k].size() - 4; j += 4) 00513 { 00514 double t0 = c[j] + s[j]; 00515 double t1 = c[j+1] + s[j+1]; 00516 00517 c[j] = t0; 00518 c[j+1] = t1; 00519 00520 t0 = c[j+2] + s[j+2]; 00521 t1 = c[j+3] + s[j+3]; 00522 00523 c[j+2] = t0; 00524 c[j+3] = t1; 00525 } 00526 for( ; j < (int)centers[k].size(); j++ ) 00527 c[j] += s[j]; 00528 00529 counters[k]++; //increment the number of datapoints in this class 00530 idx++; 00531 } 00532 } 00533 00534 if (iter > 0) 00535 max_dist = 0; 00536 00537 for(uint k=0; k<centers.size(); k++) 00538 { 00539 std::vector<double>::iterator c = centers[k].begin(); //the mean vector for this label 00540 if (counters[k] != 0) //if we have some members in the class 00541 { 00542 //scale the mean vector with the number of its members 00543 double scale = 1./counters[k]; 00544 for(uint j = 0; j < centers[k].size(); j++ ) 00545 c[j] *= scale; 00546 } 00547 else 00548 { 00549 //random initalization 00550 //pick a keypoint at random from any object 00551 int obj = randomUpToNotIncluding(itsObjects.size()); 00552 int kp = randomUpToNotIncluding(itsObjects[obj].keypoints.size()); 00553 00554 //assign the cluster center from this keypoint 00555 for(uint j=0; j<centers[k].size(); j++) 00556 c[j] = itsObjects[obj].keypoints[kp].fv[j]; 00557 } 00558 00559 if (iter > 0) 00560 { 00561 //find the distance betwwen the old center hypothisis and the new one 00562 double dist = 0; 00563 std::vector<double>::iterator c_o = oldCenters[k].begin(); 00564 for(uint j = 0; j < centers[k].size(); j++ ) 00565 { 00566 double t = c[j] - c_o[j]; 00567 dist += t*t; 00568 } 00569 if( max_dist < dist ) 00570 max_dist = dist; 00571 } 00572 } 00573 00574 //assign the labels to the new clusters 00575 idx = 0; 00576 for(uint obj=0; obj<itsObjects.size(); obj++) 00577 { 00578 uint numKp = itsObjects[obj].keypoints.size(); 00579 for (uint kp=0; kp<numKp; kp++) 00580 { 00581 std::vector<double>::iterator s = itsObjects[obj].keypoints[kp].fv.begin(); //the data vector 00582 int k_best = 0; 00583 double min_dist = std::numeric_limits<double>::max(); 00584 00585 //find the center with the lowest distance to this data vector 00586 for(uint k=0; k<centers.size(); k++) 00587 { 00588 std::vector<double>::iterator c = centers[k].begin(); //the mean vector for this label 00589 double dist = 0; 00590 00591 int j = 0; 00592 for(; j<= (int)centers[k].size() - 4; j += 4) 00593 { 00594 double t0 = c[j] - s[j]; 00595 double t1 = c[j+1] - s[j+1]; 00596 dist += t0*t0 + t1*t1; 00597 00598 t0 = c[j+2] - s[j+2]; 00599 t1 = c[j+3] - s[j+3]; 00600 dist += t0*t0 + t1*t1; 00601 } 00602 00603 for( ; j < (int)centers[k].size(); j++ ) 00604 { 00605 double t = c[j] - s[j]; 00606 dist += t*t; 00607 } 00608 00609 if (min_dist > dist) 00610 { 00611 min_dist = dist; 00612 k_best = k; 00613 } 00614 } 00615 labels[idx] = k_best; 00616 idx++; 00617 } 00618 } 00619 00620 if (max_dist < epsilon) 00621 break; 00622 00623 //if we did not terminate, then set the swap old centers to the new centers 00624 for(uint j=0; j<centers.size(); j++) 00625 { 00626 for(uint i=0; i<centers[j].size(); i++) 00627 { 00628 double tmp = centers[j][i]; 00629 centers[j][i] = oldCenters[j][i]; 00630 oldCenters[j][i] = tmp; 00631 } 00632 } 00633 00634 } 00635 00636 //ensure that we do not have empty clusters 00637 for(uint i=0; i<counters.size(); i++) 00638 counters[i] = 0; 00639 00640 uint idx = 0; 00641 for(uint obj=0; obj<itsObjects.size(); obj++) 00642 { 00643 uint numKp = itsObjects[obj].keypoints.size(); 00644 for (uint kp=0; kp<numKp; kp++) 00645 { 00646 counters[labels[idx]]++; 00647 idx++; 00648 } 00649 } 00650 00651 00652 //for(uint i=0; i<labels.size(); i++) 00653 // printf("%i %i\n", i, labels[i]); 00654 00655 //for(uint i=0; i<counters.size(); i++) 00656 //{ 00657 // if (counters[i] == 0) 00658 // LINFO("Counter is zero"); 00659 //} 00660 00661 itsCodeWords = centers; 00662 00663 } 00664 00665 00666 /* 00667 void ObjRecBOF::getCodeWords(int numCodeWords) 00668 { 00669 int numOfKeypoints = 0; 00670 int nDims = 0; 00671 00672 //find the number of data and the dimentions 00673 for(uint i=0; i<itsObjects.size(); i++) 00674 { 00675 numOfKeypoints += itsObjects[i].keypoints.size(); 00676 if (itsObjects[i].keypoints.size() > 0) 00677 nDims = itsObjects[i].keypoints[0].fv.size(); 00678 } 00679 00680 00681 //insert values into the data tables 00682 CvMat *data = cvCreateMat(numOfKeypoints, nDims, CV_32FC1); 00683 CvMat *labels = cvCreateMat(numOfKeypoints, 1, CV_32SC1); 00684 00685 cvZero(labels); 00686 int idx=0; 00687 for(uint obj=0; obj<itsObjects.size(); obj++) 00688 { 00689 for (uint i=0; i<itsObjects[obj].keypoints.size(); i++) 00690 { 00691 std::vector<double> fv = itsObjects[obj].keypoints[i].fv; 00692 for (uint j=0; j<fv.size(); j++) 00693 { 00694 cvmSet(data, idx, j, fv[j]); 00695 //cvSetReal2D(data, idx, j, fv[j]); 00696 } 00697 idx++; 00698 } 00699 } 00700 00701 //run kmeans 00702 cvKMeans2(data, numCodeWords, labels, 00703 cvTermCriteria( CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10, 1.0 )); 00704 00705 for(int i=0; i<numOfKeypoints; i++) 00706 { 00707 printf("%i %i\n", i, labels->data.i[i]); 00708 } 00709 //get the clusters means 00710 CvMat *clusterMeans = cvCreateMat(numCodeWords, nDims, CV_32FC1); 00711 CvMat *nDataCount = cvCreateMat(numCodeWords, 1, CV_32FC1); 00712 cvZero(clusterMeans); 00713 cvZero(nDataCount); 00714 00715 for(int i=0; i<numOfKeypoints; i++) 00716 { 00717 for(int j=0; j<nDims; j++) 00718 { 00719 double val = cvGetReal2D(clusterMeans, labels->data.i[i], j); 00720 double val2 = cvGetReal2D(data, i, j); 00721 cvSetReal2D(clusterMeans, labels->data.i[i], j, val+val2); 00722 00723 // printf("%0.1f %0.1f %0.1f\n", val, val2, count); 00724 00725 } 00726 double count = cvmGet(nDataCount, labels->data.i[i], 0); 00727 cvmSet(nDataCount, labels->data.i[i], 0, count+1); 00728 } 00729 00730 //inset the cluster means into the codeWords 00731 itsCodeWords.resize(numCodeWords); 00732 for(int i=0; i<numCodeWords; i++) 00733 { 00734 double count = cvmGet(nDataCount, i, 0); 00735 std::vector<double> codes; 00736 codes.resize(nDims); 00737 for(int j=0; j<nDims; j++) 00738 { 00739 double sum = cvmGet(clusterMeans, i, j); 00740 //cvmSet(clusterMeans, i, j, sum/count); 00741 codes[j] = sum/count; 00742 } 00743 itsCodeWords[i] = codes; 00744 } 00745 00746 00747 cvReleaseMat(&data); 00748 cvReleaseMat(&labels); 00749 cvReleaseMat(&clusterMeans); 00750 cvReleaseMat(&nDataCount); 00751 } 00752 */ 00753 00754 void ObjRecBOF::readSaliencyFeatures(const char* dirname) 00755 { 00756 DIR *dp = opendir(dirname); 00757 if (dp == NULL) 00758 LFATAL("Can not open %s", dirname); 00759 dirent *dirp; 00760 while ((dirp = readdir(dp)) != NULL ) { 00761 std::string dirName(dirp->d_name); 00762 if (dirName.find(".dat") != std::string::npos) 00763 { 00764 char filename[255]; 00765 snprintf(filename, sizeof(filename), "%s/%s", dirname, dirp->d_name); 00766 Object obj; 00767 obj.name = std::string(dirp->d_name); 00768 obj.keypoints = readSaliencyKeypoints(filename); //read the keypoints 00769 itsObjects.push_back(obj); 00770 } 00771 } 00772 closedir(dp); 00773 } 00774 00775 std::vector<ObjRecBOF::Keypoint> ObjRecBOF::readSaliencyKeypoints(const char *filename) 00776 { 00777 00778 std::vector<Keypoint> keypoints; 00779 LINFO("Reading %s", filename); 00780 FILE *fp; 00781 fp = fopen(filename, "r"); 00782 if (!fp) 00783 { 00784 LINFO("Error reading %s", filename); 00785 return keypoints; 00786 } 00787 00788 int len = 42; 00789 while (fp != NULL) 00790 { 00791 Keypoint key; 00792 int keyNum, x, y; 00793 if (fscanf(fp, "%i %i %i", &keyNum, &x, &y) != 3) break; 00794 key.x = (double)x; 00795 key.y = (double)y; 00796 key.codeWord = -1; 00797 00798 for (int j = 0; j < len; j++) { 00799 float val = -1; 00800 if (fscanf(fp, "%f", &val) != 1) 00801 perror("Invalid keypoint file value."); 00802 key.fv.push_back(val); 00803 } 00804 keypoints.push_back(key); 00805 } 00806 00807 fclose(fp); 00808 return keypoints; 00809 } 00810 00811 std::vector<ObjRecBOF::Keypoint> ObjRecBOF::readSIFTKeypoints(const char *filename) 00812 { 00813 00814 std::vector<Keypoint> keypoints; 00815 printf("Reading %s\n", filename); 00816 FILE *fp; 00817 fp = fopen(filename, "r"); 00818 if (!fp) 00819 { 00820 LINFO("Error reading %s", filename); 00821 return keypoints; 00822 } 00823 00824 int i, j, num, len, val; 00825 00826 if (fscanf(fp, "%d %d", &num, &len) != 2) 00827 perror("Invalid keypoint file beginning."); 00828 00829 if (len != 128) 00830 perror("Keypoint descriptor length invalid (should be 128)."); 00831 for (i = 0; i < num; i++) { 00832 00833 Keypoint key; 00834 float x, y, scale, ori; 00835 if (fscanf(fp, "%f %f %f %f", &x, &y, &scale, &ori) != 4) break; 00836 key.x = x; 00837 key.y = y; 00838 key.scale = scale; 00839 key.ori = ori; 00840 00841 for (j = 0; j < len; j++) { 00842 if (fscanf(fp, "%d", &val) != 1 || val < 0 || val > 255) 00843 perror("Invalid keypoint file value."); 00844 key.fv.push_back((double)val); 00845 } 00846 keypoints.push_back(key); 00847 } 00848 00849 fclose(fp); 00850 00851 return keypoints; 00852 } 00853 00854 std::vector<ObjRecBOF::Keypoint> ObjRecBOF::readIlabSIFTKeypoints(const char *filename) 00855 { 00856 00857 std::vector<Keypoint> keypoints; 00858 printf("Reading %s\n", filename); 00859 FILE *fp; 00860 fp = fopen(filename, "r"); 00861 if (!fp) 00862 { 00863 LINFO("Error reading %s", filename); 00864 return keypoints; 00865 } 00866 00867 int len = 128; 00868 00869 while (fp != NULL) 00870 { 00871 Keypoint key; 00872 int x, y; 00873 if (fscanf(fp, "%i %i", &x, &y) != 2) break; 00874 key.x = x; 00875 key.y = y; 00876 00877 for (int j = 0; j < len; j++) { 00878 int val; 00879 if (fscanf(fp, "%d", &val) != 1 || val < 0 || val > 255) 00880 perror("Invalid keypoint file value."); 00881 key.fv.push_back((double)val); 00882 } 00883 keypoints.push_back(key); 00884 } 00885 00886 fclose(fp); 00887 00888 return keypoints; 00889 } 00890 00891 std::vector<std::vector<byte> > ObjRecBOF::getSiftDescriptor(const Image<float> &lum, 00892 const float x, const float y, const float s) 00893 { 00894 00895 Image<float> mag, ori; 00896 00897 //gradientSobel(lum, mag, ori, 3); 00898 gradient(lum, mag, ori); 00899 00900 Histogram OV(itsNumOriArray); 00901 00902 // 1. Calculate main orientation vector 00903 calculateOrientationVector(x, y, s, mag, ori, OV); 00904 00905 //Image<byte> histImg = OV.getHistogramImage(256, 256, 0, -1); 00906 00907 // 2. Create feature vector and keypoint for each significant 00908 // orientation peak: 00909 return createVectorsAndKeypoints(x, y, s, mag, ori, OV); 00910 00911 } 00912 00913 void ObjRecBOF::calculateOrientationVector(const float x, const float y, const float s, 00914 const Image<float>& gradmag, const Image<float>& gradorie, Histogram& OV) { 00915 00916 00917 // compute the effective blurring sigma corresponding to the 00918 // fractional scale s: 00919 const float sigma = s; 00920 00921 const float sig = 1.5F * sigma, inv2sig2 = - 0.5F / (sig * sig); 00922 const int dimX = gradmag.getWidth(), dimY = gradmag.getHeight(); 00923 00924 const int xi = int(x + 0.5f); 00925 const int yi = int(y + 0.5f); 00926 00927 const int rad = int(3.0F * sig); 00928 const int rad2 = rad * rad; 00929 00930 00931 // search bounds: 00932 int starty = yi - rad; if (starty < 0) starty = 0; 00933 int stopy = yi + rad; if (stopy >= dimY) stopy = dimY-1; 00934 00935 // 1. Calculate orientation vector 00936 for (int ind_y = starty; ind_y <= stopy; ind_y ++) 00937 { 00938 // given that y, get the corresponding range of x values that 00939 // lie within the disk (and remain within the image): 00940 const int yoff = ind_y - yi; 00941 const int bound = int(sqrtf(float(rad2 - yoff*yoff)) + 0.5F); 00942 int startx = xi - bound; if (startx < 0) startx = 0; 00943 int stopx = xi + bound; if (stopx >= dimX) stopx = dimX-1; 00944 00945 for (int ind_x = startx; ind_x <= stopx; ind_x ++) 00946 { 00947 const float dx = float(ind_x) - x, dy = float(ind_y) - y; 00948 const float distSq = dx * dx + dy * dy; 00949 00950 // get gradient: 00951 const float gradVal = gradmag.getVal(ind_x, ind_y); 00952 00953 // compute the gaussian weight for this histogram entry: 00954 const float gaussianWeight = expf(distSq * inv2sig2); 00955 00956 // add this orientation to the histogram 00957 // [-pi ; pi] -> [0 ; 2pi] 00958 float angle = gradorie.getVal(ind_x, ind_y) + M_PI; 00959 00960 // [0 ; 2pi] -> [0 ; 36] 00961 angle = 0.5F * angle * itsNumOriArray / M_PI; 00962 while (angle < 0.0F) angle += itsNumOriArray; 00963 while (angle >= itsNumOriArray) angle -= itsNumOriArray; 00964 00965 OV.addValueInterp(angle, gaussianWeight * gradVal); 00966 } 00967 } 00968 00969 00970 // smooth the orientation histogram 3 times: 00971 for (int i = 0; i < 3; i++) OV.smooth(); 00972 } 00973 00974 // ###################################################################### 00975 00976 00977 std::vector<std::vector<byte> > ObjRecBOF::createVectorsAndKeypoints(const float x, 00978 const float y, const float s, 00979 const Image<float>& gradmag, const Image<float>& gradorie, Histogram& OV) 00980 { 00981 00982 const float sigma = s; //itsSigma * powf(2.0F, s / float(itsLumBlur.size() - 3)); 00983 00984 // find the max in the histogram: 00985 float maxPeakValue = OV.findMax(); 00986 00987 const int xi = int(x + 0.5f); 00988 const int yi = int(y + 0.5f); 00989 00990 uint numkp = 0; 00991 00992 std::vector<std::vector<byte> > descriptor; 00993 00994 // 2. Create feature vector and keypoint for each significant 00995 // orientation peak: 00996 for (int bin = 0; bin < itsNumOriArray; bin++) 00997 { 00998 // consider the peak centered around 'bin': 00999 const float midval = OV.getValue(bin); 01000 01001 // if current value much smaller than global peak or = to zero, forget it: 01002 if (midval == 0 || midval < 0.8F * maxPeakValue) continue; 01003 01004 // get value to the left of current value 01005 const float leftval = OV.getValue((bin == 0) ? itsNumOriArray-1 : bin-1); 01006 01007 // get value to the right of current value 01008 const float rightval = OV.getValue((bin == itsNumOriArray-1) ? 0 : bin+1); 01009 01010 // only consider local peaks: 01011 if (leftval > midval) continue; 01012 if (rightval > midval) continue; 01013 01014 // interpolate the values to get the orientation of the peak: 01015 // with f(x) = ax^2 + bx + c 01016 // f(-1) = x0 = leftval 01017 // f( 0) = x1 = midval 01018 // f(+1) = x2 = rightval 01019 // => a = (x0+x2)/2 - x1 01020 // b = (x2-x0)/2 01021 // c = x1 01022 // f'(x) = 0 => x = -b/2a 01023 const float a = 0.5f * (leftval + rightval) - midval; 01024 const float b = 0.5f * (rightval - leftval); 01025 float realangle = float(bin) - 0.5F * b / a; 01026 01027 realangle *= 2.0F * M_PI / itsNumOriArray; // [0:36] to [0:2pi] 01028 realangle -= M_PI; // [0:2pi] to [-pi:pi] 01029 01030 // ############ Create keypoint: 01031 01032 // compute the feature vector: 01033 FeatureVector fv; 01034 01035 const float sinAngle = sin(realangle), cosAngle = cos(realangle); 01036 01037 // check this scale 01038 const int radius = int(5.0F * sigma + 0.5F); // NOTE: Lowe uses radius=8? 01039 const float gausssig = float(radius); // 1/2 width of descript window 01040 const float gaussfac = - 0.5F / (gausssig * gausssig); 01041 01042 01043 // Scan a window of diameter 2*radius+1 around the point of 01044 // interest, and we will cumulate local samples into a 4x4 grid 01045 // of bins, with interpolation. NOTE: rx and ry loop over a 01046 // square that is assumed centered around the point of interest 01047 // and rotated to the gradient orientation (realangle): 01048 01049 int scale = abs(int(s)); 01050 scale = scale > 5 ? 5 : scale; 01051 01052 for (int ry = -radius; ry <= radius; ry++) 01053 for (int rx = -radius; rx <= radius; rx++) 01054 { 01055 // rotate the point: 01056 const float newX = rx * cosAngle - ry * sinAngle; 01057 const float newY = rx * sinAngle + ry * cosAngle; 01058 01059 // get the coords in the image frame of reference: 01060 const float orgX = newX + float(xi); 01061 const float orgY = newY + float(yi); 01062 01063 // if outside the image, forget it: 01064 if (gradmag.coordsOk(orgX, orgY) == false) continue; 01065 01066 // find the fractional coords of the corresponding bin 01067 // (we subdivide our window into a 4x4 grid of bins): 01068 const float xf = 2.0F + 2.0F * float(rx) / float(radius); 01069 const float yf = 2.0F + 2.0F * float(ry) / float(radius); 01070 01071 01072 // find the Gaussian weight from distance to center and 01073 // get weighted gradient magnitude: 01074 const float gaussFactor = expf((newX*newX+newY*newY) * gaussfac); 01075 const float weightedMagnitude = 01076 gaussFactor * gradmag.getValInterp(orgX, orgY); 01077 01078 // get the gradient orientation relative to the keypoint 01079 // orientation and scale it for 8 orientation bins: 01080 float gradAng = gradorie.getValInterp(orgX, orgY) - realangle; 01081 01082 gradAng=fmod(gradAng, 2*M_PI); //bring the range from 0 to M_PI 01083 01084 //convert from -M_PI to M_PI 01085 if (gradAng < 0.0) gradAng+=2*M_PI; //convert to -M_PI to M_PI 01086 if (gradAng >= M_PI) gradAng-=2*M_PI; 01087 //split to eight bins 01088 const float orient = (gradAng + M_PI) * 8 / (2 * M_PI); 01089 01090 /* 01091 //reflect the angle to convert from 0 to M_PI 01092 if (gradAng >= M_PI) gradAng-=M_PI; 01093 //split to four bins 01094 const float orient = (gradAng + M_PI) * 4 / (2 * M_PI); 01095 */ 01096 01097 // will be interpolated into 2 x 2 x 2 bins: 01098 01099 fv.addValue(xf, yf, orient, weightedMagnitude); 01100 01101 } 01102 01103 // normalize, clamp, scale and convert to byte: 01104 std::vector<byte> oriVec; 01105 01106 fv.toByteKey(oriVec); 01107 01108 double mag = fv.getMag(); 01109 if (oriVec.size() > 0 && mag > 0) 01110 descriptor.push_back(oriVec); 01111 //The key point 01112 01113 ++ numkp; 01114 01115 } 01116 return descriptor; 01117 } 01118 01119 01120 std::string ObjRecBOF::test(const Image<PixRGB<byte> > &img) 01121 { 01122 01123 01124 return std::string("Test"); 01125 } 01126 01127 01128 // ###################################################################### 01129 /* So things look consistent in everyone's emacs... */ 01130 /* Local Variables: */ 01131 /* indent-tabs-mode: nil */ 01132 /* End: */