00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039 #include "FeatureMatching/DPM.H"
00040 #include "Image/DrawOps.H"
00041 #include "Image/MathOps.H"
00042 #include "Image/Kernels.H"
00043 #include "Image/CutPaste.H"
00044 #include "Image/ColorOps.H"
00045 #include "Image/FilterOps.H"
00046 #include "Image/ShapeOps.H"
00047 #include "SIFT/FeatureVector.H"
00048 #include "GUI/DebugWin.H"
00049 #include "Image/Layout.H"
00050 #include "Image/Convolutions.H"
00051
00052
00053
00054
00055 DPM::DPM() :
00056 itsInterval(10)
00057 {
00058 itsThreadServer.reset(new WorkThreadServer("DPM", 10));
00059 }
00060
00061
00062 DPM::~DPM()
00063 {
00064 }
00065
00066
00067 void DPM::computeFeaturePyramid(const Image<PixRGB<byte> >& img)
00068 {
00069 int numBin = 8;
00070
00071 int width = img.getWidth();
00072 int height = img.getHeight();
00073 double sc = pow(2,1.0F/itsInterval);
00074 int maxScale = 1 + floor(log(std::min(width,height)/(5*numBin))/log(sc));
00075
00076 HOG hog;
00077
00078 itsFeaturesPyramid.clear();
00079 itsFeaturesPyramid.resize(maxScale + itsInterval);
00080
00081
00082 for(int i=0; i<itsInterval; i++)
00083 {
00084 double scale = 1.0/pow(sc,i);
00085 int width = (int)round((float)img.getWidth()*scale);
00086 int height = (int)round((float)img.getHeight()*scale);
00087 Image<PixRGB<byte> > scaled = rescale(img, width, height);
00088
00089
00090 itsFeaturesPyramid[i].features = hog.getFeatures(scaled, numBin/2);
00091 itsFeaturesPyramid[i].scale = 2*scale;
00092 itsFeaturesPyramid[i].bins = numBin/2;
00093
00094
00095 itsFeaturesPyramid[i+itsInterval].features = hog.getFeatures(scaled, numBin);
00096 itsFeaturesPyramid[i+itsInterval].scale = scale;
00097 itsFeaturesPyramid[i+itsInterval].bins = numBin;
00098
00099
00100 for(int j= i+itsInterval; j < maxScale; j+=itsInterval)
00101 {
00102 Image<PixRGB<byte> > scaled2 = rescale(scaled,
00103 scaled.getWidth()/2, scaled.getHeight()/2);
00104 scaled = scaled2;
00105
00106 itsFeaturesPyramid[j+itsInterval].features = hog.getFeatures(scaled, numBin);
00107 itsFeaturesPyramid[j+itsInterval].scale = 0.5*itsFeaturesPyramid[j].scale;
00108 itsFeaturesPyramid[j+itsInterval].bins = numBin;
00109 }
00110 }
00111
00112
00113 int xpadd = 11;
00114 int ypadd = 6;
00115
00116 for(uint i=0; i<itsFeaturesPyramid.size(); i++)
00117 {
00118 ImageSet<double>& feature = itsFeaturesPyramid[i].features;
00119 Dims dims(feature[0].getWidth() + (xpadd+1)*2,
00120 feature[0].getHeight() + (ypadd+1)*2);
00121 ImageSet<double> newFeature(feature.size(), dims, ZEROS);
00122 for(uint f=0; f<newFeature.size(); f++)
00123 inplacePaste(newFeature[f], feature[f], Point2D<int>(xpadd+1, ypadd+1));
00124
00125
00126 int fID = 31;
00127 for(int y=0; y<ypadd+1; y++)
00128 for(int x=0; x<newFeature[fID].getWidth(); x++)
00129 newFeature[fID].setVal(x,y, 1);
00130
00131 for(int y=newFeature[fID].getHeight()-ypadd; y<newFeature[fID].getHeight(); y++)
00132 for(int x=0; x<newFeature[fID].getWidth(); x++)
00133 newFeature[fID].setVal(x,y, 1);
00134
00135 for(int y=ypadd+1; y < newFeature[fID].getHeight()-ypadd; y++)
00136 {
00137 for(int x=0; x<xpadd+1; x++)
00138 newFeature[fID].setVal(x,y, 1);
00139 for(int x=newFeature[fID].getWidth()-xpadd; x< newFeature[fID].getWidth(); x++)
00140 newFeature[fID].setVal(x,y, 1);
00141 }
00142
00143 itsFeaturesPyramid[i].features = newFeature;
00144
00145
00146 }
00147
00148 }
00149
00150 void DPM::convolveModel()
00151 {
00152
00153 itsModelScores.clear();
00154 for(uint level=itsInterval; level<itsFeaturesPyramid.size(); level++)
00155 {
00156 LINFO("Level %i", level);
00157
00158 std::vector<rutz::shared_ptr<DPMJob> > itsJobs;
00159
00160 for(size_t c=0; c<itsModel.components.size(); c++)
00161 {
00162 itsJobs.push_back(rutz::make_shared(new DPMJob(this, c, level)));
00163 itsThreadServer->enqueueJob(itsJobs.back());
00164 }
00165
00166
00167 for(size_t i=0; i<itsJobs.size(); i++)
00168 itsJobs[i]->wait();
00169
00170
00171 Image<double> maxScore;
00172 Image<int> maxComp;
00173
00174 for(size_t i=0; i<itsJobs.size(); i++)
00175 {
00176 Image<double> score = itsJobs[i]->getScore();
00177
00178 if (maxScore.initialized())
00179 {
00180
00181
00182 int w = std::min(score.getWidth(), maxScore.getWidth());
00183 int h = std::min(score.getHeight(), maxScore.getHeight());
00184
00185 int maxScoreWidth = maxScore.getWidth();
00186 int scoreWidth = score.getWidth();
00187
00188 Image<double>::const_iterator scorePtr = score.begin();
00189 Image<double>::iterator maxScorePtr = maxScore.beginw();
00190 for(int y=0; y<h; y++)
00191 for(int x=0; x<w; x++)
00192 if (scorePtr[y*scoreWidth + x] > maxScorePtr[y*maxScoreWidth + x])
00193 {
00194 maxScorePtr[y*maxScoreWidth + x] = scorePtr[y*scoreWidth + x];
00195 maxComp[y*maxScoreWidth + x] = itsJobs[i]->getComponent();
00196 }
00197 } else {
00198 maxScore = score;
00199 maxComp = Image<int>(maxScore.getDims(), NO_INIT);
00200 maxComp.clear(-1);
00201 }
00202 }
00203 itsModelScores.push_back(ModelScore(maxScore, maxComp, level));
00204 }
00205
00206 }
00207
00208 Image<double> DPM::convolveComponent(const int comp, const int level)
00209 {
00210 ImageSet<double>& imgFeatures = itsFeaturesPyramid[level].features;
00211
00212
00213 ModelComponent& component = itsModel.components[comp];
00214 ImageSet<double>& rootFeatures = component.rootFilter;
00215 Image<double> score = convolveFeatures(imgFeatures, rootFeatures);
00216
00217 std::vector<double> deformation(4);
00218 deformation[0] = 1000;
00219 deformation[1] = 0;
00220 deformation[2] = 1000;
00221 deformation[3] = 0;
00222
00223 Image<double> scoreDef = distanceTrans(score, deformation);
00224 score = scoreDef;
00225 int scoreW = score.getWidth();
00226 int scoreH = score.getHeight();
00227
00228 score += component.offset;
00229
00230
00231 for(size_t p=0; p<component.parts.size(); p++)
00232 {
00233 ImageSet<double>& partImgFeatures = itsFeaturesPyramid[level-itsInterval].features;
00234 ModelPart& part = component.parts[p];
00235 ImageSet<double>& partFeatures = part.features;
00236 Image<double> partScore = convolveFeatures(partImgFeatures, partFeatures);
00237
00238
00239 Image<double> defScore = distanceTrans(partScore, part.deformation);
00240 Point2D<float> anchor = part.anchor + Point2D<int>(1,1) - Point2D<int>(11,6);
00241
00242 int defScoreW = defScore.getWidth();
00243 int defScoreH = defScore.getHeight();
00244
00245 for(int y=0; y<scoreH; y++)
00246 for(int x=0; x<scoreW; x++)
00247 {
00248 int px = anchor.i + x*2;
00249 int py = anchor.j + y*2;
00250 if (px > 0 && px < defScoreW &&
00251 py > 0 && py < defScoreH)
00252 score[y*scoreW + x] += defScore[py*defScoreW + px];
00253 }
00254
00255 }
00256
00257 return score;
00258 }
00259
00260 std::vector<DPM::Detection> DPM::getBoundingBoxes(const float thresh)
00261 {
00262 std::vector<Detection> detections;
00263
00264 for(uint i=0; i<itsModelScores.size(); i++)
00265 {
00266
00267 Image<double>::const_iterator scorePtr = itsModelScores[i].score.begin();
00268 const int w = itsModelScores[i].score.getWidth();
00269 const int h = itsModelScores[i].score.getHeight();
00270
00271 for(int y=0; y<h; y++)
00272 for(int x=0; x<w; x++)
00273 {
00274 if (scorePtr[y*w+x] > thresh)
00275 {
00276 int level = itsModelScores[i].level;
00277 float scale = (float)itsFeaturesPyramid[level].bins/itsFeaturesPyramid[level].scale;
00278 int comp = itsModelScores[i].component.getVal(x,y);
00279 int paddX = 11;
00280 int paddY = 6;
00281 ModelComponent& component = itsModel.components[comp];
00282 ImageSet<double>& rootFeatures = component.rootFilter;
00283 Dims size = rootFeatures[0].getDims();
00284 int x1 = (x - paddX)*scale+1;
00285 int y1 = (y - paddY)*scale+1;
00286
00287 int x2 = x1 + size.w()*scale - 1;
00288 int y2 = y1 + size.h()*scale - 1;
00289
00290 Rectangle rect = Rectangle::tlbrI(y1,x1, y2, x2);
00291 detections.push_back(Detection(rect, scorePtr[y*w+x], comp));
00292 }
00293 }
00294 }
00295
00296 return detections;
00297
00298 }
00299
00300 std::vector<DPM::Detection> DPM::filterDetections(const std::vector<Detection>& detections, const float overlap)
00301 {
00302
00303 std::vector<Detection> filteredDetections;
00304
00305
00306
00307
00308
00309
00310 for(uint i=0; i<detections.size(); i++)
00311 {
00312
00313 bool isOverlap = false;
00314 for(uint j=0; j<filteredDetections.size(); j++)
00315 {
00316 if (detections[i].bb.getOverlapRatio(filteredDetections[j].bb) > overlap)
00317 {
00318 isOverlap = true;
00319
00320 if (detections[i].score > filteredDetections[j].score)
00321 filteredDetections[j] = detections[i];
00322 }
00323 }
00324
00325 if (!isOverlap)
00326 filteredDetections.push_back(detections[i]);
00327 }
00328
00329 return filteredDetections;
00330
00331 }
00332
00333 void DPM::dtHelper(const Image<double>::const_iterator src,
00334 Image<double>::iterator dst,
00335 Image<int>::iterator ptr,
00336 int step,
00337 int s1, int s2, int d1, int d2,
00338 double a, double b)
00339 {
00340
00341 if (d2 >= d1)
00342 {
00343 int d = (d1+d2) >> 1;
00344 int s = s1;
00345
00346 for (int p = s1+1; p <= s2; p++)
00347 {
00348 if (src[s*step] - a*squareOf(d-s) - b*(d-s) <
00349 src[p*step] - a*squareOf(d-p) - b*(d-p))
00350 s = p;
00351 }
00352 dst[d*step] = src[s*step] - a*squareOf(d-s) - b*(d-s);
00353 ptr[d*step] = s;
00354
00355
00356 dtHelper(src, dst, ptr, step, s1, s, d1, d-1, a, b);
00357 dtHelper(src, dst, ptr, step, s, s2, d+1, d2, a, b);
00358 }
00359 }
00360
00361 Image<double> DPM::distanceTrans(const Image<double>& score,
00362 const std::vector<double>& deformation)
00363 {
00364 double ax = deformation[0];
00365 double bx = deformation[1];
00366 double ay = deformation[2];
00367 double by = deformation[3];
00368
00369 Image<double> defScore(score.getDims(), ZEROS);
00370 Image<int> scoreIx(score.getDims(), ZEROS);
00371 Image<int> scoreIy(score.getDims(), ZEROS);
00372 Image<int>::iterator ptrIx = scoreIx.beginw();
00373 Image<int>::iterator ptrIy = scoreIy.beginw();
00374
00375 Image<double> tmpM(score.getDims(), ZEROS);
00376 Image<int> tmpIx(score.getDims(), ZEROS);
00377 Image<int> tmpIy(score.getDims(), ZEROS);
00378
00379 const Image<double>::const_iterator src = score.begin();
00380 Image<double>::iterator dst = defScore.beginw();
00381 Image<double>::iterator ptrTmpM = tmpM.beginw();
00382 Image<int>::iterator ptrTmpIx = tmpIx.beginw();
00383 Image<int>::iterator ptrTmpIy = tmpIy.beginw();
00384
00385 int w = score.getWidth();
00386 int h = score.getHeight();
00387
00388
00389 for(int x=0; x<w; x++)
00390 dtHelper(src+x, ptrTmpM+x, ptrTmpIx+x, w,
00391 0, h-1, 0, h-1, ay, by);
00392
00393 for(int y=0; y<h; y++)
00394 dtHelper(ptrTmpM+y*w, dst+y*w, ptrTmpIy+y*w, 1,
00395 0, w-1, 0, w-1, ax, bx);
00396
00397 for(int y=0; y<h; y++)
00398 for(int x=0; x<w; x++)
00399 {
00400 int p = y*w+x;
00401 ptrIy[p] = ptrTmpIy[p];
00402 ptrIx[p] = ptrTmpIx[y*w+ptrTmpIy[p]];
00403 }
00404
00405
00406 return defScore;
00407
00408 }
00409
00410 Image<double> DPM::convolveFeatures(const ImageSet<double>& imgFeatures,
00411 const ImageSet<double>& filterFeatures)
00412 {
00413 if (imgFeatures.size() == 0)
00414 return Image<double>();
00415
00416 ASSERT(imgFeatures.size() == filterFeatures.size());
00417
00418
00419 int w = imgFeatures[0].getWidth() - filterFeatures[0].getWidth() + 1;
00420 int h = imgFeatures[0].getHeight() - filterFeatures[0].getHeight() + 1;
00421
00422 int filtWidth = filterFeatures[0].getWidth();
00423 int filtHeight = filterFeatures[0].getHeight();
00424 int srcWidth = imgFeatures[0].getWidth();
00425
00426 Image<double> score(w,h, ZEROS);
00427
00428 for(uint i=0; i<imgFeatures.size(); i++)
00429 {
00430 Image<double>::const_iterator srcPtr = imgFeatures[i].begin();
00431 Image<double>::const_iterator filtPtr = filterFeatures[i].begin();
00432 Image<double>::iterator dstPtr = score.beginw();
00433
00434 for(int y=0; y<h; y++)
00435 for(int x=0; x<w; x++)
00436 {
00437
00438 double val = 0;
00439 for(int yp = 0; yp < filtHeight; yp++)
00440 for(int xp = 0; xp < filtWidth; xp++)
00441 {
00442 val += srcPtr[(y+yp)*srcWidth + (x+xp)] * filtPtr[yp*filtWidth + xp];
00443 }
00444
00445 *(dstPtr++) += val;
00446 }
00447 }
00448
00449 return score;
00450 }
00451
00452 Image<PixRGB<byte> > DPM::getModelImage()
00453 {
00454 int lineLength = 20;
00455
00456 Layout<PixRGB<byte> > modelImg;
00457
00458 HOG hog;
00459 for(size_t c=0; c<itsModel.components.size(); c++)
00460 {
00461
00462 ModelComponent& component = itsModel.components[c];
00463 Image<PixRGB<byte> > compImage =
00464 hog.getHistogramImage(component.rootFilter);
00465 compImage = rescale(compImage, compImage.getDims()*2);
00466 Image<PixRGB<byte> > partsImage = compImage;
00467 Image<PixRGB<byte> > defImage(compImage.getDims(), ZEROS);
00468
00469
00470 for(size_t p=0; p<component.parts.size(); p++)
00471 {
00472 ModelPart& part = component.parts[p];
00473 Image<PixRGB<byte> > partImg =
00474 hog.getHistogramImage(part.features);
00475
00476
00477 Point2D<int> topLeft = Point2D<int>(part.anchor*lineLength);
00478 inplacePaste(partsImage, partImg, topLeft);
00479
00480 drawRect(partsImage, Rectangle(topLeft, partImg.getDims()),
00481 PixRGB<byte>(255,0,0));
00482
00483
00484 Image<double> defImg(partImg.getDims(), ZEROS);
00485
00486 float defScale = 500;
00487 for(int y=0; y<defImg.getHeight(); y++)
00488 for(int x=0; x<defImg.getWidth(); x++)
00489 {
00490 double px = (double)((defImg.getWidth()/2) - x)/20.0;
00491 double py = (double)((defImg.getHeight()/2) - y)/20.0;
00492
00493 double val = px*px * part.deformation[0] +
00494 px * part.deformation[1] +
00495 py*py * part.deformation[2] +
00496 py * part.deformation[3];
00497 defImg.setVal(x,y,val*defScale);
00498
00499 }
00500 inplacePaste(defImage, toRGB((Image<byte>)defImg), topLeft);
00501
00502 drawRect(defImage, Rectangle(topLeft, partImg.getDims()),
00503 PixRGB<byte>(255,0,0));
00504
00505 }
00506 Layout<PixRGB<byte> > compDisp = hcat(compImage, partsImage);
00507 compDisp = hcat(compDisp, defImage);
00508
00509 modelImg = vcat(modelImg, compDisp);
00510 }
00511
00512 return modelImg.render();
00513
00514 }
00515
00516 void DPM::readModel(const char* fileName)
00517 {
00518 FILE *fp = fopen(fileName, "rb");
00519 if (fp == NULL)
00520 LFATAL("Can not open model file (%s)", fileName);
00521
00522 LINFO("Reading model from %s", fileName);
00523
00524 int numComponents;
00525 if (fread(&numComponents, sizeof(int), 1, fp) != 1)
00526 LFATAL("Invalid model file");
00527 LINFO("Num Components %i", numComponents);
00528
00529 itsModel = Model();
00530
00531 for(int c=0; c<numComponents; c++)
00532 {
00533 ModelComponent modelComponent;
00534
00535
00536 int filterDims[3];
00537 if(fread(filterDims, sizeof(int), 3, fp) != 3)
00538 LFATAL("Invalid model file");
00539 int width = filterDims[1];
00540 int height = filterDims[0];
00541 int numFeatures = filterDims[2];
00542
00543 ImageSet<double> features;
00544 for(int feature=0; feature<numFeatures; feature++)
00545 {
00546 Image<double> featureMap(width, height, NO_INIT);
00547 if (fread(featureMap.getArrayPtr(), sizeof(double), width*height, fp) != (uint)(width*height))
00548 LFATAL("Invalid model file");
00549 features.push_back(featureMap);
00550 }
00551
00552
00553 double offset = 0;
00554 if (fread(&offset, sizeof(double), 1, fp) != 1)
00555 LFATAL("Invalid model file");
00556 modelComponent.offset = offset;
00557
00558
00559 modelComponent.rootFilter = features;
00560
00561
00562 int numParts;
00563 if (fread(&numParts, sizeof(int), 1, fp) != 1)
00564 LFATAL("Invalid model file");
00565 LINFO("Reading component %i number of parts %i", c, numParts);
00566 modelComponent.parts.resize(numParts);
00567 for(int p=0; p<numParts; p++)
00568 {
00569
00570 double anchor[3];
00571 if(fread(anchor, sizeof(double), 3, fp) != 3)
00572 LFATAL("Invalid model file");
00573 modelComponent.parts[p].anchor = Point2D<float>(anchor[0], anchor[1]);
00574 modelComponent.parts[p].scale = anchor[2];
00575
00576
00577 double deformation[4];
00578 if(fread(deformation, sizeof(double), 4, fp) != 4)
00579 LFATAL("Invalid model file");
00580 modelComponent.parts[p].deformation =
00581 std::vector<double>(deformation, deformation + 4);
00582
00583
00584 int filterDims[3];
00585 if(fread(filterDims, sizeof(int), 3, fp) != 3)
00586 LFATAL("Invalid model file");
00587 int width = filterDims[0];
00588 int height = filterDims[1];
00589 int numFeatures = filterDims[2];
00590
00591 ImageSet<double> features;
00592 for(int feature=0; feature<numFeatures; feature++)
00593 {
00594 Image<double> featureMap(width, height, NO_INIT);
00595 if (fread(featureMap.getArrayPtr(), sizeof(double), width*height, fp) != (uint)(width*height))
00596 LFATAL("Invalid model file");
00597 features.push_back(featureMap);
00598 }
00599
00600 modelComponent.parts[p].features = features;
00601 }
00602
00603 itsModel.components.push_back(modelComponent);
00604 }
00605 fclose(fp);
00606
00607 }
00608
00609
00610
00611
00612
00613
00614