00001 /*!@file FeatureMatching/DPM.C */ 00002 00003 00004 // //////////////////////////////////////////////////////////////////// // 00005 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00006 // by the University of Southern California (USC) and the iLab at USC. // 00007 // See http://iLab.usc.edu for information about this project. // 00008 // //////////////////////////////////////////////////////////////////// // 00009 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00010 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00011 // in Visual Environments, and Applications'' by Christof Koch and // 00012 // Laurent Itti, California Institute of Technology, 2001 (patent // 00013 // pending; application number 09/912,225 filed July 23, 2001; see // 00014 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00015 // //////////////////////////////////////////////////////////////////// // 00016 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00017 // // 00018 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00019 // redistribute it and/or modify it under the terms of the GNU General // 00020 // Public License as published by the Free Software Foundation; either // 00021 // version 2 of the License, or (at your option) any later version. // 00022 // // 00023 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00024 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00025 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00026 // PURPOSE. See the GNU General Public License for more details. // 00027 // // 00028 // You should have received a copy of the GNU General Public License // 00029 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00030 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00031 // Boston, MA 02111-1307 USA. // 00032 // //////////////////////////////////////////////////////////////////// // 00033 // 00034 // Primary maintainer for this file: Lior Elazary 00035 // $HeadURL$ 00036 // $Id$ 00037 // 00038 00039 #include "FeatureMatching/DPM.H" 00040 #include "Image/DrawOps.H" 00041 #include "Image/MathOps.H" 00042 #include "Image/Kernels.H" 00043 #include "Image/CutPaste.H" 00044 #include "Image/ColorOps.H" 00045 #include "Image/FilterOps.H" 00046 #include "Image/ShapeOps.H" 00047 #include "SIFT/FeatureVector.H" 00048 #include "GUI/DebugWin.H" 00049 #include "Image/Layout.H" 00050 #include "Image/Convolutions.H" 00051 00052 00053 00054 00055 DPM::DPM() : 00056 itsInterval(10) 00057 { 00058 itsThreadServer.reset(new WorkThreadServer("DPM", 10)); 00059 } 00060 00061 00062 DPM::~DPM() 00063 { 00064 } 00065 00066 00067 void DPM::computeFeaturePyramid(const Image<PixRGB<byte> >& img) 00068 { 00069 int numBin = 8; 00070 00071 int width = img.getWidth(); 00072 int height = img.getHeight(); 00073 double sc = pow(2,1.0F/itsInterval); 00074 int maxScale = 1 + floor(log(std::min(width,height)/(5*numBin))/log(sc)); 00075 00076 HOG hog; 00077 00078 itsFeaturesPyramid.clear(); 00079 itsFeaturesPyramid.resize(maxScale + itsInterval); 00080 00081 00082 for(int i=0; i<itsInterval; i++) 00083 { 00084 double scale = 1.0/pow(sc,i); 00085 int width = (int)round((float)img.getWidth()*scale); 00086 int height = (int)round((float)img.getHeight()*scale); 00087 Image<PixRGB<byte> > scaled = rescale(img, width, height); 00088 00089 //First 2x interval 00090 itsFeaturesPyramid[i].features = hog.getFeatures(scaled, numBin/2); 00091 itsFeaturesPyramid[i].scale = 2*scale; 00092 itsFeaturesPyramid[i].bins = numBin/2; 00093 00094 //second 2x interval 00095 itsFeaturesPyramid[i+itsInterval].features = hog.getFeatures(scaled, numBin); 00096 itsFeaturesPyramid[i+itsInterval].scale = scale; 00097 itsFeaturesPyramid[i+itsInterval].bins = numBin; 00098 00099 //Remaining intervals 00100 for(int j= i+itsInterval; j < maxScale; j+=itsInterval) 00101 { 00102 Image<PixRGB<byte> > scaled2 = rescale(scaled, 00103 scaled.getWidth()/2, scaled.getHeight()/2); 00104 scaled = scaled2; 00105 00106 itsFeaturesPyramid[j+itsInterval].features = hog.getFeatures(scaled, numBin); 00107 itsFeaturesPyramid[j+itsInterval].scale = 0.5*itsFeaturesPyramid[j].scale; 00108 itsFeaturesPyramid[j+itsInterval].bins = numBin; 00109 } 00110 } 00111 00112 //Padd the pyramid 00113 int xpadd = 11; 00114 int ypadd = 6; 00115 00116 for(uint i=0; i<itsFeaturesPyramid.size(); i++) 00117 { 00118 ImageSet<double>& feature = itsFeaturesPyramid[i].features; 00119 Dims dims(feature[0].getWidth() + (xpadd+1)*2, 00120 feature[0].getHeight() + (ypadd+1)*2); 00121 ImageSet<double> newFeature(feature.size(), dims, ZEROS); 00122 for(uint f=0; f<newFeature.size(); f++) 00123 inplacePaste(newFeature[f], feature[f], Point2D<int>(xpadd+1, ypadd+1)); 00124 00125 //Write the boundary occlusion in the last feature 00126 int fID = 31; 00127 for(int y=0; y<ypadd+1; y++) 00128 for(int x=0; x<newFeature[fID].getWidth(); x++) 00129 newFeature[fID].setVal(x,y, 1); 00130 00131 for(int y=newFeature[fID].getHeight()-ypadd; y<newFeature[fID].getHeight(); y++) 00132 for(int x=0; x<newFeature[fID].getWidth(); x++) 00133 newFeature[fID].setVal(x,y, 1); 00134 00135 for(int y=ypadd+1; y < newFeature[fID].getHeight()-ypadd; y++) 00136 { 00137 for(int x=0; x<xpadd+1; x++) 00138 newFeature[fID].setVal(x,y, 1); 00139 for(int x=newFeature[fID].getWidth()-xpadd; x< newFeature[fID].getWidth(); x++) 00140 newFeature[fID].setVal(x,y, 1); 00141 } 00142 00143 itsFeaturesPyramid[i].features = newFeature; 00144 00145 00146 } 00147 00148 } 00149 00150 void DPM::convolveModel() 00151 { 00152 00153 itsModelScores.clear(); 00154 for(uint level=itsInterval; level<itsFeaturesPyramid.size(); level++) 00155 { 00156 LINFO("Level %i", level); 00157 00158 std::vector<rutz::shared_ptr<DPMJob> > itsJobs; 00159 00160 for(size_t c=0; c<itsModel.components.size(); c++) 00161 { 00162 itsJobs.push_back(rutz::make_shared(new DPMJob(this, c, level))); 00163 itsThreadServer->enqueueJob(itsJobs.back()); 00164 } 00165 00166 //Wait for jobs to finish 00167 for(size_t i=0; i<itsJobs.size(); i++) 00168 itsJobs[i]->wait(); 00169 00170 //Get the max score and the component that has that score 00171 Image<double> maxScore; 00172 Image<int> maxComp; 00173 00174 for(size_t i=0; i<itsJobs.size(); i++) 00175 { 00176 Image<double> score = itsJobs[i]->getScore(); 00177 00178 if (maxScore.initialized()) 00179 { 00180 //Since the levels in the pyramid could be different (due to a difference in size of the rootFilter) 00181 //Only add the smaller amount 00182 int w = std::min(score.getWidth(), maxScore.getWidth()); 00183 int h = std::min(score.getHeight(), maxScore.getHeight()); 00184 00185 int maxScoreWidth = maxScore.getWidth(); 00186 int scoreWidth = score.getWidth(); 00187 00188 Image<double>::const_iterator scorePtr = score.begin(); 00189 Image<double>::iterator maxScorePtr = maxScore.beginw(); 00190 for(int y=0; y<h; y++) 00191 for(int x=0; x<w; x++) 00192 if (scorePtr[y*scoreWidth + x] > maxScorePtr[y*maxScoreWidth + x]) 00193 { 00194 maxScorePtr[y*maxScoreWidth + x] = scorePtr[y*scoreWidth + x]; 00195 maxComp[y*maxScoreWidth + x] = itsJobs[i]->getComponent(); 00196 } 00197 } else { 00198 maxScore = score; 00199 maxComp = Image<int>(maxScore.getDims(), NO_INIT); 00200 maxComp.clear(-1); 00201 } 00202 } 00203 itsModelScores.push_back(ModelScore(maxScore, maxComp, level)); 00204 } 00205 00206 } 00207 00208 Image<double> DPM::convolveComponent(const int comp, const int level) 00209 { 00210 ImageSet<double>& imgFeatures = itsFeaturesPyramid[level].features; 00211 00212 //Convolve the root filter 00213 ModelComponent& component = itsModel.components[comp]; 00214 ImageSet<double>& rootFeatures = component.rootFilter; 00215 Image<double> score = convolveFeatures(imgFeatures, rootFeatures); 00216 00217 std::vector<double> deformation(4); 00218 deformation[0] = 1000; 00219 deformation[1] = 0; 00220 deformation[2] = 1000; 00221 deformation[3] = 0; 00222 00223 Image<double> scoreDef = distanceTrans(score, deformation); 00224 score = scoreDef; 00225 int scoreW = score.getWidth(); 00226 int scoreH = score.getHeight(); 00227 00228 score += component.offset; 00229 00230 //Convolve the parts at a finer resolution (2x) 00231 for(size_t p=0; p<component.parts.size(); p++) 00232 { 00233 ImageSet<double>& partImgFeatures = itsFeaturesPyramid[level-itsInterval].features; 00234 ModelPart& part = component.parts[p]; 00235 ImageSet<double>& partFeatures = part.features; 00236 Image<double> partScore = convolveFeatures(partImgFeatures, partFeatures); 00237 00238 //Apply the deformation 00239 Image<double> defScore = distanceTrans(partScore, part.deformation); 00240 Point2D<float> anchor = part.anchor + Point2D<int>(1,1) - Point2D<int>(11,6); //Pyramid offset 00241 00242 int defScoreW = defScore.getWidth(); 00243 int defScoreH = defScore.getHeight(); 00244 //Add the score to the rootFilter by shifting the position 00245 for(int y=0; y<scoreH; y++) 00246 for(int x=0; x<scoreW; x++) 00247 { 00248 int px = anchor.i + x*2; 00249 int py = anchor.j + y*2; 00250 if (px > 0 && px < defScoreW && 00251 py > 0 && py < defScoreH) 00252 score[y*scoreW + x] += defScore[py*defScoreW + px]; 00253 } 00254 00255 } 00256 00257 return score; 00258 } 00259 00260 std::vector<DPM::Detection> DPM::getBoundingBoxes(const float thresh) 00261 { 00262 std::vector<Detection> detections; //Scores of the detections 00263 00264 for(uint i=0; i<itsModelScores.size(); i++) 00265 { 00266 //Find detection over the threshold 00267 Image<double>::const_iterator scorePtr = itsModelScores[i].score.begin(); 00268 const int w = itsModelScores[i].score.getWidth(); 00269 const int h = itsModelScores[i].score.getHeight(); 00270 00271 for(int y=0; y<h; y++) 00272 for(int x=0; x<w; x++) 00273 { 00274 if (scorePtr[y*w+x] > thresh) 00275 { 00276 int level = itsModelScores[i].level; 00277 float scale = (float)itsFeaturesPyramid[level].bins/itsFeaturesPyramid[level].scale; 00278 int comp = itsModelScores[i].component.getVal(x,y); 00279 int paddX = 11; 00280 int paddY = 6; 00281 ModelComponent& component = itsModel.components[comp]; 00282 ImageSet<double>& rootFeatures = component.rootFilter; 00283 Dims size = rootFeatures[0].getDims(); 00284 int x1 = (x - paddX)*scale+1; 00285 int y1 = (y - paddY)*scale+1; 00286 00287 int x2 = x1 + size.w()*scale - 1; 00288 int y2 = y1 + size.h()*scale - 1; 00289 00290 Rectangle rect = Rectangle::tlbrI(y1,x1, y2, x2); 00291 detections.push_back(Detection(rect, scorePtr[y*w+x], comp)); 00292 } 00293 } 00294 } 00295 00296 return detections; 00297 00298 } 00299 00300 std::vector<DPM::Detection> DPM::filterDetections(const std::vector<Detection>& detections, const float overlap) 00301 { 00302 00303 std::vector<Detection> filteredDetections; 00304 00305 //Non-maximum suppression. 00306 //Greedily select high-scoring detections and skip detections 00307 //that are significantly covered by a previously selected detection. 00308 00309 //This alg still need to be verified for correctness. 00310 for(uint i=0; i<detections.size(); i++) 00311 { 00312 //See if we overlap with this detection 00313 bool isOverlap = false; 00314 for(uint j=0; j<filteredDetections.size(); j++) 00315 { 00316 if (detections[i].bb.getOverlapRatio(filteredDetections[j].bb) > overlap) 00317 { 00318 isOverlap = true; 00319 //If we overlap with this one, then check if this has a better score 00320 if (detections[i].score > filteredDetections[j].score) 00321 filteredDetections[j] = detections[i]; 00322 } 00323 } 00324 00325 if (!isOverlap) 00326 filteredDetections.push_back(detections[i]); 00327 } 00328 00329 return filteredDetections; 00330 00331 } 00332 00333 void DPM::dtHelper(const Image<double>::const_iterator src, 00334 Image<double>::iterator dst, 00335 Image<int>::iterator ptr, 00336 int step, 00337 int s1, int s2, int d1, int d2, 00338 double a, double b) 00339 { 00340 00341 if (d2 >= d1) //Check if we are out of bounds 00342 { 00343 int d = (d1+d2) >> 1; 00344 int s = s1; 00345 //Get the max value using the quadratic function while iterating from s1+1 to s2 00346 for (int p = s1+1; p <= s2; p++) 00347 { 00348 if (src[s*step] - a*squareOf(d-s) - b*(d-s) < 00349 src[p*step] - a*squareOf(d-p) - b*(d-p)) 00350 s = p; 00351 } 00352 dst[d*step] = src[s*step] - a*squareOf(d-s) - b*(d-s); 00353 ptr[d*step] = s; 00354 00355 //Iteratively call the next locations 00356 dtHelper(src, dst, ptr, step, s1, s, d1, d-1, a, b); 00357 dtHelper(src, dst, ptr, step, s, s2, d+1, d2, a, b); 00358 } 00359 } 00360 00361 Image<double> DPM::distanceTrans(const Image<double>& score, 00362 const std::vector<double>& deformation) 00363 { 00364 double ax = deformation[0]; 00365 double bx = deformation[1]; 00366 double ay = deformation[2]; 00367 double by = deformation[3]; 00368 00369 Image<double> defScore(score.getDims(), ZEROS); 00370 Image<int> scoreIx(score.getDims(), ZEROS); 00371 Image<int> scoreIy(score.getDims(), ZEROS); 00372 Image<int>::iterator ptrIx = scoreIx.beginw(); 00373 Image<int>::iterator ptrIy = scoreIy.beginw(); 00374 00375 Image<double> tmpM(score.getDims(), ZEROS); 00376 Image<int> tmpIx(score.getDims(), ZEROS); 00377 Image<int> tmpIy(score.getDims(), ZEROS); 00378 00379 const Image<double>::const_iterator src = score.begin(); 00380 Image<double>::iterator dst = defScore.beginw(); 00381 Image<double>::iterator ptrTmpM = tmpM.beginw(); 00382 Image<int>::iterator ptrTmpIx = tmpIx.beginw(); 00383 Image<int>::iterator ptrTmpIy = tmpIy.beginw(); 00384 00385 int w = score.getWidth(); 00386 int h = score.getHeight(); 00387 00388 00389 for(int x=0; x<w; x++) 00390 dtHelper(src+x, ptrTmpM+x, ptrTmpIx+x, w, 00391 0, h-1, 0, h-1, ay, by); 00392 00393 for(int y=0; y<h; y++) 00394 dtHelper(ptrTmpM+y*w, dst+y*w, ptrTmpIy+y*w, 1, 00395 0, w-1, 0, w-1, ax, bx); 00396 00397 for(int y=0; y<h; y++) 00398 for(int x=0; x<w; x++) 00399 { 00400 int p = y*w+x; 00401 ptrIy[p] = ptrTmpIy[p]; 00402 ptrIx[p] = ptrTmpIx[y*w+ptrTmpIy[p]]; 00403 } 00404 00405 00406 return defScore; 00407 00408 } 00409 00410 Image<double> DPM::convolveFeatures(const ImageSet<double>& imgFeatures, 00411 const ImageSet<double>& filterFeatures) 00412 { 00413 if (imgFeatures.size() == 0) 00414 return Image<double>(); 00415 00416 ASSERT(imgFeatures.size() == filterFeatures.size()); 00417 00418 //Compute size of output 00419 int w = imgFeatures[0].getWidth() - filterFeatures[0].getWidth() + 1; 00420 int h = imgFeatures[0].getHeight() - filterFeatures[0].getHeight() + 1; 00421 00422 int filtWidth = filterFeatures[0].getWidth(); 00423 int filtHeight = filterFeatures[0].getHeight(); 00424 int srcWidth = imgFeatures[0].getWidth(); 00425 00426 Image<double> score(w,h, ZEROS); 00427 00428 for(uint i=0; i<imgFeatures.size(); i++) 00429 { 00430 Image<double>::const_iterator srcPtr = imgFeatures[i].begin(); 00431 Image<double>::const_iterator filtPtr = filterFeatures[i].begin(); 00432 Image<double>::iterator dstPtr = score.beginw(); 00433 00434 for(int y=0; y<h; y++) 00435 for(int x=0; x<w; x++) 00436 { 00437 //Convolve the filter 00438 double val = 0; 00439 for(int yp = 0; yp < filtHeight; yp++) 00440 for(int xp = 0; xp < filtWidth; xp++) 00441 { 00442 val += srcPtr[(y+yp)*srcWidth + (x+xp)] * filtPtr[yp*filtWidth + xp]; 00443 } 00444 00445 *(dstPtr++) += val; 00446 } 00447 } 00448 00449 return score; 00450 } 00451 00452 Image<PixRGB<byte> > DPM::getModelImage() 00453 { 00454 int lineLength = 20; 00455 00456 Layout<PixRGB<byte> > modelImg; 00457 00458 HOG hog; 00459 for(size_t c=0; c<itsModel.components.size(); c++) 00460 { 00461 00462 ModelComponent& component = itsModel.components[c]; 00463 Image<PixRGB<byte> > compImage = 00464 hog.getHistogramImage(component.rootFilter); 00465 compImage = rescale(compImage, compImage.getDims()*2); 00466 Image<PixRGB<byte> > partsImage = compImage; 00467 Image<PixRGB<byte> > defImage(compImage.getDims(), ZEROS); 00468 00469 //Paste the parts 00470 for(size_t p=0; p<component.parts.size(); p++) 00471 { 00472 ModelPart& part = component.parts[p]; 00473 Image<PixRGB<byte> > partImg = 00474 hog.getHistogramImage(part.features); 00475 00476 //Paste into the root filter 00477 Point2D<int> topLeft = Point2D<int>(part.anchor*lineLength); 00478 inplacePaste(partsImage, partImg, topLeft); 00479 //Draw a border around the part 00480 drawRect(partsImage, Rectangle(topLeft, partImg.getDims()), 00481 PixRGB<byte>(255,0,0)); 00482 00483 //Draw the deformation 00484 Image<double> defImg(partImg.getDims(), ZEROS); 00485 00486 float defScale = 500; 00487 for(int y=0; y<defImg.getHeight(); y++) 00488 for(int x=0; x<defImg.getWidth(); x++) 00489 { 00490 double px = (double)((defImg.getWidth()/2) - x)/20.0; 00491 double py = (double)((defImg.getHeight()/2) - y)/20.0; 00492 00493 double val = px*px * part.deformation[0] + 00494 px * part.deformation[1] + 00495 py*py * part.deformation[2] + 00496 py * part.deformation[3]; 00497 defImg.setVal(x,y,val*defScale); 00498 00499 } 00500 inplacePaste(defImage, toRGB((Image<byte>)defImg), topLeft); 00501 //Draw a border around the part 00502 drawRect(defImage, Rectangle(topLeft, partImg.getDims()), 00503 PixRGB<byte>(255,0,0)); 00504 00505 } 00506 Layout<PixRGB<byte> > compDisp = hcat(compImage, partsImage); 00507 compDisp = hcat(compDisp, defImage); 00508 00509 modelImg = vcat(modelImg, compDisp); 00510 } 00511 00512 return modelImg.render(); 00513 00514 } 00515 00516 void DPM::readModel(const char* fileName) 00517 { 00518 FILE *fp = fopen(fileName, "rb"); 00519 if (fp == NULL) 00520 LFATAL("Can not open model file (%s)", fileName); 00521 00522 LINFO("Reading model from %s", fileName); 00523 00524 int numComponents; 00525 if (fread(&numComponents, sizeof(int), 1, fp) != 1) 00526 LFATAL("Invalid model file"); 00527 LINFO("Num Components %i", numComponents); 00528 00529 itsModel = Model(); 00530 00531 for(int c=0; c<numComponents; c++) 00532 { 00533 ModelComponent modelComponent; 00534 00535 //Get the root filter 00536 int filterDims[3]; 00537 if(fread(filterDims, sizeof(int), 3, fp) != 3) 00538 LFATAL("Invalid model file"); 00539 int width = filterDims[1]; 00540 int height = filterDims[0]; 00541 int numFeatures = filterDims[2]; 00542 00543 ImageSet<double> features; 00544 for(int feature=0; feature<numFeatures; feature++) 00545 { 00546 Image<double> featureMap(width, height, NO_INIT); 00547 if (fread(featureMap.getArrayPtr(), sizeof(double), width*height, fp) != (uint)(width*height)) 00548 LFATAL("Invalid model file"); 00549 features.push_back(featureMap); 00550 } 00551 00552 //get the offset 00553 double offset = 0; 00554 if (fread(&offset, sizeof(double), 1, fp) != 1) 00555 LFATAL("Invalid model file"); 00556 modelComponent.offset = offset; 00557 00558 00559 modelComponent.rootFilter = features; 00560 00561 //Get the parts 00562 int numParts; 00563 if (fread(&numParts, sizeof(int), 1, fp) != 1) 00564 LFATAL("Invalid model file"); 00565 LINFO("Reading component %i number of parts %i", c, numParts); 00566 modelComponent.parts.resize(numParts); 00567 for(int p=0; p<numParts; p++) 00568 { 00569 //Get the anchor 00570 double anchor[3]; 00571 if(fread(anchor, sizeof(double), 3, fp) != 3) 00572 LFATAL("Invalid model file"); 00573 modelComponent.parts[p].anchor = Point2D<float>(anchor[0], anchor[1]); 00574 modelComponent.parts[p].scale = anchor[2]; 00575 00576 //get the deformation 00577 double deformation[4]; 00578 if(fread(deformation, sizeof(double), 4, fp) != 4) 00579 LFATAL("Invalid model file"); 00580 modelComponent.parts[p].deformation = 00581 std::vector<double>(deformation, deformation + 4); 00582 00583 //Get the features 00584 int filterDims[3]; 00585 if(fread(filterDims, sizeof(int), 3, fp) != 3) 00586 LFATAL("Invalid model file"); 00587 int width = filterDims[0]; 00588 int height = filterDims[1]; 00589 int numFeatures = filterDims[2]; 00590 00591 ImageSet<double> features; 00592 for(int feature=0; feature<numFeatures; feature++) 00593 { 00594 Image<double> featureMap(width, height, NO_INIT); 00595 if (fread(featureMap.getArrayPtr(), sizeof(double), width*height, fp) != (uint)(width*height)) 00596 LFATAL("Invalid model file"); 00597 features.push_back(featureMap); 00598 } 00599 00600 modelComponent.parts[p].features = features; 00601 } 00602 00603 itsModel.components.push_back(modelComponent); 00604 } 00605 fclose(fp); 00606 00607 } 00608 00609 00610 // ###################################################################### 00611 /* So things look consistent in everyone's emacs... */ 00612 /* Local Variables: */ 00613 /* indent-tabs-mode: nil */ 00614 /* End: */