00001 /*!@file ObjRec/ObjRecSPM.C Obj Reconition using spatial pyramid matching 00002 algorithem from Lzebnik, Schmid and Ponce 00003 */ 00004 00005 // //////////////////////////////////////////////////////////////////// // 00006 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00007 // University of Southern California (USC) and the iLab at USC. // 00008 // See http://iLab.usc.edu for information about this project. // 00009 // //////////////////////////////////////////////////////////////////// // 00010 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00011 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00012 // in Visual Environments, and Applications'' by Christof Koch and // 00013 // Laurent Itti, California Institute of Technology, 2001 (patent // 00014 // pending; application number 09/912,225 filed July 23, 2001; see // 00015 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00016 // //////////////////////////////////////////////////////////////////// // 00017 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00018 // // 00019 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00020 // redistribute it and/or modify it under the terms of the GNU General // 00021 // Public License as published by the Free Software Foundation; either // 00022 // version 2 of the License, or (at your option) any later version. // 00023 // // 00024 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00025 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00026 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00027 // PURPOSE. See the GNU General Public License for more details. // 00028 // // 00029 // You should have received a copy of the GNU General Public License // 00030 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00031 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00032 // Boston, MA 02111-1307 USA. // 00033 // //////////////////////////////////////////////////////////////////// // 00034 // 00035 // Primary maintainer for this file: Lior Elazary <elazary@usc.edu> 00036 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/ObjRec/ObjRecSPM.C $ 00037 // $Id: ObjRecSPM.C 10794 2009-02-08 06:21:09Z itti $ 00038 // 00039 00040 #include "Component/OptionManager.H" 00041 #include "Image/ColorOps.H" 00042 #include "Image/FilterOps.H" 00043 #include "Image/MathOps.H" 00044 #include "Image/Kernels.H" 00045 #include "Image/Convolutions.H" 00046 #include "GUI/DebugWin.H" 00047 #include "ObjRec/ObjRecSPM.H" 00048 #include "SIFT/FeatureVector.H" 00049 00050 00051 // ###################################################################### 00052 ObjRecSPM::ObjRecSPM(OptionManager& mgr, const std::string& descrName, 00053 const std::string& tagName) : 00054 ModelComponent(mgr, descrName, tagName), 00055 itsNumOri(4), 00056 itsNumScales(2), 00057 itsNumOriArray(36), //for sift descriptor 00058 itsObjects(0), 00059 itsUseSaliency(false) 00060 { 00061 00062 float stdmin = 1.75F; 00063 float stdstep = 0.5F; 00064 int fsmin = 3; 00065 int fsstep = 1; 00066 00067 //create the filters 00068 for(int scale = 0; scale < itsNumScales; scale++) 00069 for(int ori = 0; ori < itsNumOri; ori++) 00070 { 00071 00072 Image<float> filter = dogFilter<float>(stdmin + stdstep * scale, 00073 (float)ori * 180.0F / (float)itsNumOri, 00074 fsmin + fsstep * scale); 00075 00076 // fsmin + fsstep * scale); 00077 // normalize to zero mean: 00078 filter -= mean(filter); 00079 00080 // normalize to unit sum-of-squares: 00081 filter /= sum(squared(filter)); 00082 00083 itsFilters.push_back(filter); 00084 } 00085 00086 if (itsUseSaliency) 00087 { 00088 itsGetSaliency = nub::soft_ref<GetSaliency>(new GetSaliency(mgr)); 00089 addSubComponent(itsGetSaliency); 00090 } 00091 00092 } 00093 00094 void ObjRecSPM::start2() 00095 { 00096 00097 00098 } 00099 00100 ObjRecSPM::~ObjRecSPM() 00101 { 00102 } 00103 00104 00105 void ObjRecSPM::train(const Image<PixRGB<byte> > &img, const std::string label) 00106 { 00107 00108 Image<float> input = luminance(img); //convert to gray 00109 00110 if (itsUseSaliency) 00111 { 00112 Image<PixRGB<byte> > inputImg = rescale(img, 256, 256); 00113 itsGetSaliency->compute(inputImg, SimTime::MSECS(3.0)); 00114 Image<float> smap = itsGetSaliency->getSalmap(); 00115 smap = rescale(smap, img.getDims()); 00116 inplaceNormalize(smap, 0.0F, 1.0F); 00117 input *= smap; //weigh the input by the saliency map 00118 } 00119 00120 //Descriptor desc = extractFeatures(input); 00121 Descriptor desc = extractSiftFeatures(input); 00122 00123 uint objId = getObject(label); 00124 00125 itsObjects[objId].model.push_back(desc); 00126 00127 } 00128 00129 void ObjRecSPM::finalizeTraining() 00130 { 00131 LINFO("Training done"); 00132 00133 for(uint i=0; i<itsObjects.size(); i++) 00134 for(uint j=0; j<itsObjects[i].model.size(); j++) 00135 { 00136 printf("Obj %u model %u size %"ZU, 00137 i, j, itsObjects[i].model[j].featureLevelHist.size()); 00138 00139 } 00140 00141 00142 } 00143 00144 00145 uint ObjRecSPM::getObject(const std::string name) 00146 { 00147 //Find the object 00148 //TODO can use hash function 00149 uint i=0; 00150 for(i=0; i<itsObjects.size(); i++) 00151 if (itsObjects[i].name == name) 00152 return i; 00153 00154 //Object not found. create a new one 00155 Object obj; 00156 obj.id = i; 00157 obj.name = name; 00158 obj.model.clear(); 00159 00160 itsObjects.push_back(obj); 00161 00162 return i; 00163 } 00164 00165 00166 ObjRecSPM::Descriptor ObjRecSPM::extractFeatures(const Image<float> &input) 00167 { 00168 00169 double normSum = 0; //the numalization constant 00170 00171 //Compute the various low level features 00172 ImageSet<float> featuresValues(itsFilters.size()); 00173 for(uint i=0; i<itsFilters.size(); i++) 00174 { 00175 Image<float> tmp = convolve(input, itsFilters[i], CONV_BOUNDARY_CLEAN); //No normalization 00176 //Image<float> tmp = convolveHmax(input, itsFilters[i]); // normalize by image energy 00177 tmp = abs(tmp); 00178 normSum += sum(tmp); 00179 featuresValues[i] = tmp; 00180 } 00181 00182 //get the histograms 00183 00184 Descriptor desc; 00185 for(uint feature=0; feature<featuresValues.size(); feature++) //for each feature m 00186 { 00187 Image<float> featureVal = featuresValues[feature]; 00188 00189 std::vector<Histogram> levelHists; 00190 for(int level = 0; level < 4; level++) 00191 { 00192 int levelSize = 1<<level; 00193 Histogram hist(levelSize*levelSize); 00194 00195 int xSpace = (featureVal.getWidth()/levelSize)+1; 00196 int ySpace = (featureVal.getHeight()/levelSize)+1; 00197 00198 for(int y=0; y<featureVal.getHeight(); y++) 00199 for(int x=0; x<featureVal.getWidth(); x++) 00200 { 00201 int binPos = (int)(x/xSpace + 2*(y/ySpace)); 00202 hist.addValue(binPos, featureVal.getVal(x,y)); 00203 } 00204 hist.normalize(normSum); //normalize across the sum of all feature outputs 00205 levelHists.push_back(hist); 00206 } 00207 desc.featureLevelHist.push_back(levelHists); 00208 } 00209 00210 return desc; 00211 } 00212 00213 ObjRecSPM::Descriptor ObjRecSPM::extractSiftFeatures(const Image<float> &input) 00214 { 00215 00216 SHOWIMG(input); 00217 Descriptor desc; 00218 for(int y=10; y<input.getHeight()-10; y+=10) 00219 for(int x=10; x<input.getWidth()-10; x+=10) 00220 { 00221 SiftKeypoint kp; 00222 kp.x = x; 00223 kp.y = y; 00224 kp.fv = getSiftDescriptor(input, x,y,2); 00225 desc.siftDescriptors.push_back(kp); 00226 LINFO("%ix%i", x, y); 00227 //for(uint i=0; i<fv.size(); i++) 00228 //{ 00229 // FeatureVector fvtmp; 00230 // 00231 // SHOWIMG(fvtmp.getFeatureVectorImage(fv[i])); 00232 //} 00233 } 00234 00235 00236 return desc; 00237 } 00238 00239 std::string ObjRecSPM::predict(const Image<PixRGB<byte> > &img) 00240 { 00241 00242 Image<float> input = luminance(img); //convert to gray 00243 00244 if (itsUseSaliency) 00245 { 00246 Image<PixRGB<byte> > inputImg = rescale(img, 256, 256); 00247 itsGetSaliency->compute(inputImg, SimTime::MSECS(3.0)); 00248 Image<float> smap = itsGetSaliency->getSalmap(); 00249 smap = rescale(smap, img.getDims()); 00250 inplaceNormalize(smap, 0.0F, 1.0F); 00251 input *= smap; //weigh the input by the saliency map 00252 } 00253 00254 //Descriptor desc = extractFeatures(input); 00255 Descriptor desc = extractSiftFeatures(input); 00256 00257 //find matches 00258 00259 // Descriptor objDec = itsObjects[0].model[0]; 00260 00261 for(uint kp_i=0; kp_i<desc.siftDescriptors.size(); kp_i++) 00262 { 00263 SiftKeypoint kp = desc.siftDescriptors[kp_i]; 00264 00265 for(uint fv_i=0; fv_i<kp.fv.size(); fv_i++) 00266 { 00267 std::vector<byte> fv = kp.fv[fv_i]; 00268 FeatureVector tmpFv; 00269 00270 SHOWIMG(tmpFv.getFeatureVectorImage(fv)); 00271 } 00272 } 00273 00274 00275 00276 00277 00278 00279 int objId = findObject(desc); 00280 00281 if (objId != -1) 00282 return itsObjects[objId].name; 00283 00284 return std::string("Unknown"); 00285 } 00286 00287 //NN search 00288 int ObjRecSPM::findObject(const Descriptor &desc) 00289 { 00290 int objId = -1; 00291 00292 double minDist = std::numeric_limits<double>::max(); 00293 //Find the object with the closest distance 00294 for(uint i=0; i<itsObjects.size(); i++) 00295 { 00296 //Find the closes model match with the data 00297 for(uint j=0; j<itsObjects[i].model.size(); j++) 00298 { 00299 double dist = matchDescriptor(itsObjects[i].model[j], desc); 00300 if (dist < minDist) 00301 { 00302 minDist = dist; 00303 objId = i; 00304 } 00305 } 00306 00307 } 00308 00309 return objId; 00310 00311 } 00312 00313 double ObjRecSPM::matchDescriptor(const Descriptor &descA, const Descriptor &descB) 00314 { 00315 00316 double sum = 0; 00317 for(uint feature=0; feature<descA.featureLevelHist.size(); feature++) 00318 { 00319 sum += matchKernel(descA.featureLevelHist[feature], descB.featureLevelHist[feature]); 00320 } 00321 00322 return sum; 00323 00324 00325 00326 } 00327 00328 double ObjRecSPM::matchKernel(const std::vector<Histogram>& A, const std::vector<Histogram>& B) 00329 { 00330 00331 00332 if (B.size() > A.size()) 00333 LFATAL("Incompatibale histograms"); 00334 double dist = 0; 00335 00336 //Equlideian distance between histograms 00337 for(uint level=0; level<A.size(); level++) 00338 { 00339 Histogram modelHist = A[level]; 00340 Histogram testHist = B[level]; 00341 00342 double weight; 00343 if (level == 0) 00344 weight = 1.0F/(double)(1<<(A.size())); 00345 else 00346 weight = 1.0F/(double)(1<<(A.size()-level+1)); 00347 dist += weight*modelHist.getDistance(testHist); 00348 00349 } 00350 00351 //histogram intersection 00352 //for(uint i=0; i<A.size(); i++) 00353 // dist += std::min(A[i],B[i]); 00354 00355 //KL distance 00356 //for(uint i=0; i<A.size(); i++) 00357 //{ 00358 // //Insure that no probabiliy is set to 0 00359 // double pA = (A[i] > 0) ? A[i] : 0.0001; 00360 // double qB = (B[i] > 0) ? B[i] : 0.0001; 00361 00362 // dist += pA * log(pA/qB); 00363 //} 00364 00365 00366 return dist; 00367 00368 } 00369 00370 std::vector<std::vector<byte> > ObjRecSPM::getSiftDescriptor(const Image<float> &lum, 00371 const float x, const float y, const float s) 00372 { 00373 00374 Image<float> mag, ori; 00375 gradientSobel(lum, mag, ori, 3); 00376 00377 Histogram OV(36); 00378 00379 // 1. Calculate main orientation vector 00380 calculateOrientationVector(x, y, s, mag, ori, OV); 00381 00382 //Image<byte> histImg = OV.getHistogramImage(256, 256, 0, -1); 00383 00384 // 2. Create feature vector and keypoint for each significant 00385 // orientation peak: 00386 return createVectorsAndKeypoints(x, y, s, mag, ori, OV); 00387 00388 } 00389 00390 void ObjRecSPM::calculateOrientationVector(const float x, const float y, const float s, 00391 const Image<float>& gradmag, const Image<float>& gradorie, Histogram& OV) { 00392 00393 00394 // compute the effective blurring sigma corresponding to the 00395 // fractional scale s: 00396 const float sigma = s; 00397 00398 const float sig = 1.5F * sigma, inv2sig2 = - 0.5F / (sig * sig); 00399 const int dimX = gradmag.getWidth(), dimY = gradmag.getHeight(); 00400 00401 const int xi = int(x + 0.5f); 00402 const int yi = int(y + 0.5f); 00403 00404 const int rad = int(3.0F * sig); 00405 const int rad2 = rad * rad; 00406 00407 00408 // search bounds: 00409 int starty = yi - rad; if (starty < 0) starty = 0; 00410 int stopy = yi + rad; if (stopy >= dimY) stopy = dimY-1; 00411 00412 // 1. Calculate orientation vector 00413 for (int ind_y = starty; ind_y <= stopy; ind_y ++) 00414 { 00415 // given that y, get the corresponding range of x values that 00416 // lie within the disk (and remain within the image): 00417 const int yoff = ind_y - yi; 00418 const int bound = int(sqrtf(float(rad2 - yoff*yoff)) + 0.5F); 00419 int startx = xi - bound; if (startx < 0) startx = 0; 00420 int stopx = xi + bound; if (stopx >= dimX) stopx = dimX-1; 00421 00422 for (int ind_x = startx; ind_x <= stopx; ind_x ++) 00423 { 00424 const float dx = float(ind_x) - x, dy = float(ind_y) - y; 00425 const float distSq = dx * dx + dy * dy; 00426 00427 // get gradient: 00428 const float gradVal = gradmag.getVal(ind_x, ind_y); 00429 00430 // compute the gaussian weight for this histogram entry: 00431 const float gaussianWeight = expf(distSq * inv2sig2); 00432 00433 // add this orientation to the histogram 00434 // [-pi ; pi] -> [0 ; 2pi] 00435 float angle = gradorie.getVal(ind_x, ind_y) + M_PI; 00436 00437 // [0 ; 2pi] -> [0 ; 36] 00438 angle = 0.5F * angle * itsNumOriArray / M_PI; 00439 while (angle < 0.0F) angle += itsNumOriArray; 00440 while (angle >= itsNumOriArray) angle -= itsNumOriArray; 00441 00442 OV.addValueInterp(angle, gaussianWeight * gradVal); 00443 } 00444 } 00445 00446 00447 // smooth the orientation histogram 3 times: 00448 for (int i = 0; i < 3; i++) OV.smooth(); 00449 } 00450 00451 // ###################################################################### 00452 00453 00454 std::vector<std::vector<byte> > ObjRecSPM::createVectorsAndKeypoints(const float x, 00455 const float y, const float s, 00456 const Image<float>& gradmag, const Image<float>& gradorie, Histogram& OV) 00457 { 00458 00459 const float sigma = s; //itsSigma * powf(2.0F, s / float(itsLumBlur.size() - 3)); 00460 00461 // find the max in the histogram: 00462 float maxPeakValue = OV.findMax(); 00463 00464 const int xi = int(x + 0.5f); 00465 const int yi = int(y + 0.5f); 00466 00467 uint numkp = 0; 00468 00469 std::vector<std::vector<byte> > descriptor; 00470 00471 // 2. Create feature vector and keypoint for each significant 00472 // orientation peak: 00473 for (int bin = 0; bin < itsNumOriArray; bin++) 00474 { 00475 // consider the peak centered around 'bin': 00476 const float midval = OV.getValue(bin); 00477 00478 // if current value much smaller than global peak, forget it: 00479 if (midval < 0.8F * maxPeakValue) continue; 00480 00481 // get value to the left of current value 00482 const float leftval = OV.getValue((bin == 0) ? itsNumOriArray-1 : bin-1); 00483 00484 // get value to the right of current value 00485 const float rightval = OV.getValue((bin == itsNumOriArray-1) ? 0 : bin+1); 00486 00487 // only consider local peaks: 00488 if (leftval > midval) continue; 00489 if (rightval > midval) continue; 00490 00491 // interpolate the values to get the orientation of the peak: 00492 // with f(x) = ax^2 + bx + c 00493 // f(-1) = x0 = leftval 00494 // f( 0) = x1 = midval 00495 // f(+1) = x2 = rightval 00496 // => a = (x0+x2)/2 - x1 00497 // b = (x2-x0)/2 00498 // c = x1 00499 // f'(x) = 0 => x = -b/2a 00500 const float a = 0.5f * (leftval + rightval) - midval; 00501 const float b = 0.5f * (rightval - leftval); 00502 float realangle = float(bin) - 0.5F * b / a; 00503 00504 realangle *= 2.0F * M_PI / itsNumOriArray; // [0:36] to [0:2pi] 00505 realangle -= M_PI; // [0:2pi] to [-pi:pi] 00506 00507 // ############ Create keypoint: 00508 00509 // compute the feature vector: 00510 FeatureVector fv; 00511 00512 const float sinAngle = sin(realangle), cosAngle = cos(realangle); 00513 00514 // check this scale 00515 const int radius = int(5.0F * sigma + 0.5F); // NOTE: Lowe uses radius=8? 00516 const float gausssig = float(radius); // 1/2 width of descript window 00517 const float gaussfac = - 0.5F / (gausssig * gausssig); 00518 00519 00520 // Scan a window of diameter 2*radius+1 around the point of 00521 // interest, and we will cumulate local samples into a 4x4 grid 00522 // of bins, with interpolation. NOTE: rx and ry loop over a 00523 // square that is assumed centered around the point of interest 00524 // and rotated to the gradient orientation (realangle): 00525 00526 int scale = abs(int(s)); 00527 scale = scale > 5 ? 5 : scale; 00528 00529 for (int ry = -radius; ry <= radius; ry++) 00530 for (int rx = -radius; rx <= radius; rx++) 00531 { 00532 // rotate the point: 00533 const float newX = rx * cosAngle - ry * sinAngle; 00534 const float newY = rx * sinAngle + ry * cosAngle; 00535 00536 // get the coords in the image frame of reference: 00537 const float orgX = newX + float(xi); 00538 const float orgY = newY + float(yi); 00539 00540 // if outside the image, forget it: 00541 if (gradmag.coordsOk(orgX, orgY) == false) continue; 00542 00543 // find the fractional coords of the corresponding bin 00544 // (we subdivide our window into a 4x4 grid of bins): 00545 const float xf = 2.0F + 2.0F * float(rx) / float(radius); 00546 const float yf = 2.0F + 2.0F * float(ry) / float(radius); 00547 00548 00549 // find the Gaussian weight from distance to center and 00550 // get weighted gradient magnitude: 00551 const float gaussFactor = expf((newX*newX+newY*newY) * gaussfac); 00552 const float weightedMagnitude = 00553 gaussFactor * gradmag.getValInterp(orgX, orgY); 00554 00555 // get the gradient orientation relative to the keypoint 00556 // orientation and scale it for 8 orientation bins: 00557 float gradAng = gradorie.getValInterp(orgX, orgY) - realangle; 00558 00559 gradAng=fmod(gradAng, 2*M_PI); //bring the range from 0 to M_PI 00560 00561 //convert from -M_PI to M_PI 00562 if (gradAng < 0.0) gradAng+=2*M_PI; //convert to -M_PI to M_PI 00563 if (gradAng >= M_PI) gradAng-=2*M_PI; 00564 //split to eight bins 00565 const float orient = (gradAng + M_PI) * 8 / (2 * M_PI); 00566 00567 /* 00568 //reflect the angle to convert from 0 to M_PI 00569 if (gradAng >= M_PI) gradAng-=M_PI; 00570 //split to four bins 00571 const float orient = (gradAng + M_PI) * 4 / (2 * M_PI); 00572 */ 00573 00574 // will be interpolated into 2 x 2 x 2 bins: 00575 00576 fv.addValue(xf, yf, orient, weightedMagnitude); 00577 00578 } 00579 00580 // normalize, clamp, scale and convert to byte: 00581 std::vector<byte> oriVec; 00582 00583 fv.toByteKey(oriVec); 00584 00585 double mag = fv.getMag(); 00586 if (oriVec.size() > 0 && mag > 0) 00587 descriptor.push_back(oriVec); 00588 //The key point 00589 00590 ++ numkp; 00591 00592 } 00593 return descriptor; 00594 } 00595 00596 00597 00598 00599 00600 // ###################################################################### 00601 /* So things look consistent in everyone's emacs... */ 00602 /* Local Variables: */ 00603 /* indent-tabs-mode: nil */ 00604 /* End: */