00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040 #include "Component/OptionManager.H"
00041 #include "Image/ColorOps.H"
00042 #include "Image/FilterOps.H"
00043 #include "Image/MathOps.H"
00044 #include "Image/Kernels.H"
00045 #include "Image/Convolutions.H"
00046 #include "GUI/DebugWin.H"
00047 #include "ObjRec/ObjRecSPM.H"
00048 #include "SIFT/FeatureVector.H"
00049
00050
00051
00052 ObjRecSPM::ObjRecSPM(OptionManager& mgr, const std::string& descrName,
00053 const std::string& tagName) :
00054 ModelComponent(mgr, descrName, tagName),
00055 itsNumOri(4),
00056 itsNumScales(2),
00057 itsNumOriArray(36),
00058 itsObjects(0),
00059 itsUseSaliency(false)
00060 {
00061
00062 float stdmin = 1.75F;
00063 float stdstep = 0.5F;
00064 int fsmin = 3;
00065 int fsstep = 1;
00066
00067
00068 for(int scale = 0; scale < itsNumScales; scale++)
00069 for(int ori = 0; ori < itsNumOri; ori++)
00070 {
00071
00072 Image<float> filter = dogFilter<float>(stdmin + stdstep * scale,
00073 (float)ori * 180.0F / (float)itsNumOri,
00074 fsmin + fsstep * scale);
00075
00076
00077
00078 filter -= mean(filter);
00079
00080
00081 filter /= sum(squared(filter));
00082
00083 itsFilters.push_back(filter);
00084 }
00085
00086 if (itsUseSaliency)
00087 {
00088 itsGetSaliency = nub::soft_ref<GetSaliency>(new GetSaliency(mgr));
00089 addSubComponent(itsGetSaliency);
00090 }
00091
00092 }
00093
00094 void ObjRecSPM::start2()
00095 {
00096
00097
00098 }
00099
00100 ObjRecSPM::~ObjRecSPM()
00101 {
00102 }
00103
00104
00105 void ObjRecSPM::train(const Image<PixRGB<byte> > &img, const std::string label)
00106 {
00107
00108 Image<float> input = luminance(img);
00109
00110 if (itsUseSaliency)
00111 {
00112 Image<PixRGB<byte> > inputImg = rescale(img, 256, 256);
00113 itsGetSaliency->compute(inputImg, SimTime::MSECS(3.0));
00114 Image<float> smap = itsGetSaliency->getSalmap();
00115 smap = rescale(smap, img.getDims());
00116 inplaceNormalize(smap, 0.0F, 1.0F);
00117 input *= smap;
00118 }
00119
00120
00121 Descriptor desc = extractSiftFeatures(input);
00122
00123 uint objId = getObject(label);
00124
00125 itsObjects[objId].model.push_back(desc);
00126
00127 }
00128
00129 void ObjRecSPM::finalizeTraining()
00130 {
00131 LINFO("Training done");
00132
00133 for(uint i=0; i<itsObjects.size(); i++)
00134 for(uint j=0; j<itsObjects[i].model.size(); j++)
00135 {
00136 printf("Obj %u model %u size %"ZU,
00137 i, j, itsObjects[i].model[j].featureLevelHist.size());
00138
00139 }
00140
00141
00142 }
00143
00144
00145 uint ObjRecSPM::getObject(const std::string name)
00146 {
00147
00148
00149 uint i=0;
00150 for(i=0; i<itsObjects.size(); i++)
00151 if (itsObjects[i].name == name)
00152 return i;
00153
00154
00155 Object obj;
00156 obj.id = i;
00157 obj.name = name;
00158 obj.model.clear();
00159
00160 itsObjects.push_back(obj);
00161
00162 return i;
00163 }
00164
00165
00166 ObjRecSPM::Descriptor ObjRecSPM::extractFeatures(const Image<float> &input)
00167 {
00168
00169 double normSum = 0;
00170
00171
00172 ImageSet<float> featuresValues(itsFilters.size());
00173 for(uint i=0; i<itsFilters.size(); i++)
00174 {
00175 Image<float> tmp = convolve(input, itsFilters[i], CONV_BOUNDARY_CLEAN);
00176
00177 tmp = abs(tmp);
00178 normSum += sum(tmp);
00179 featuresValues[i] = tmp;
00180 }
00181
00182
00183
00184 Descriptor desc;
00185 for(uint feature=0; feature<featuresValues.size(); feature++)
00186 {
00187 Image<float> featureVal = featuresValues[feature];
00188
00189 std::vector<Histogram> levelHists;
00190 for(int level = 0; level < 4; level++)
00191 {
00192 int levelSize = 1<<level;
00193 Histogram hist(levelSize*levelSize);
00194
00195 int xSpace = (featureVal.getWidth()/levelSize)+1;
00196 int ySpace = (featureVal.getHeight()/levelSize)+1;
00197
00198 for(int y=0; y<featureVal.getHeight(); y++)
00199 for(int x=0; x<featureVal.getWidth(); x++)
00200 {
00201 int binPos = (int)(x/xSpace + 2*(y/ySpace));
00202 hist.addValue(binPos, featureVal.getVal(x,y));
00203 }
00204 hist.normalize(normSum);
00205 levelHists.push_back(hist);
00206 }
00207 desc.featureLevelHist.push_back(levelHists);
00208 }
00209
00210 return desc;
00211 }
00212
00213 ObjRecSPM::Descriptor ObjRecSPM::extractSiftFeatures(const Image<float> &input)
00214 {
00215
00216 SHOWIMG(input);
00217 Descriptor desc;
00218 for(int y=10; y<input.getHeight()-10; y+=10)
00219 for(int x=10; x<input.getWidth()-10; x+=10)
00220 {
00221 SiftKeypoint kp;
00222 kp.x = x;
00223 kp.y = y;
00224 kp.fv = getSiftDescriptor(input, x,y,2);
00225 desc.siftDescriptors.push_back(kp);
00226 LINFO("%ix%i", x, y);
00227
00228
00229
00230
00231
00232
00233 }
00234
00235
00236 return desc;
00237 }
00238
00239 std::string ObjRecSPM::predict(const Image<PixRGB<byte> > &img)
00240 {
00241
00242 Image<float> input = luminance(img);
00243
00244 if (itsUseSaliency)
00245 {
00246 Image<PixRGB<byte> > inputImg = rescale(img, 256, 256);
00247 itsGetSaliency->compute(inputImg, SimTime::MSECS(3.0));
00248 Image<float> smap = itsGetSaliency->getSalmap();
00249 smap = rescale(smap, img.getDims());
00250 inplaceNormalize(smap, 0.0F, 1.0F);
00251 input *= smap;
00252 }
00253
00254
00255 Descriptor desc = extractSiftFeatures(input);
00256
00257
00258
00259
00260
00261 for(uint kp_i=0; kp_i<desc.siftDescriptors.size(); kp_i++)
00262 {
00263 SiftKeypoint kp = desc.siftDescriptors[kp_i];
00264
00265 for(uint fv_i=0; fv_i<kp.fv.size(); fv_i++)
00266 {
00267 std::vector<byte> fv = kp.fv[fv_i];
00268 FeatureVector tmpFv;
00269
00270 SHOWIMG(tmpFv.getFeatureVectorImage(fv));
00271 }
00272 }
00273
00274
00275
00276
00277
00278
00279 int objId = findObject(desc);
00280
00281 if (objId != -1)
00282 return itsObjects[objId].name;
00283
00284 return std::string("Unknown");
00285 }
00286
00287
00288 int ObjRecSPM::findObject(const Descriptor &desc)
00289 {
00290 int objId = -1;
00291
00292 double minDist = std::numeric_limits<double>::max();
00293
00294 for(uint i=0; i<itsObjects.size(); i++)
00295 {
00296
00297 for(uint j=0; j<itsObjects[i].model.size(); j++)
00298 {
00299 double dist = matchDescriptor(itsObjects[i].model[j], desc);
00300 if (dist < minDist)
00301 {
00302 minDist = dist;
00303 objId = i;
00304 }
00305 }
00306
00307 }
00308
00309 return objId;
00310
00311 }
00312
00313 double ObjRecSPM::matchDescriptor(const Descriptor &descA, const Descriptor &descB)
00314 {
00315
00316 double sum = 0;
00317 for(uint feature=0; feature<descA.featureLevelHist.size(); feature++)
00318 {
00319 sum += matchKernel(descA.featureLevelHist[feature], descB.featureLevelHist[feature]);
00320 }
00321
00322 return sum;
00323
00324
00325
00326 }
00327
00328 double ObjRecSPM::matchKernel(const std::vector<Histogram>& A, const std::vector<Histogram>& B)
00329 {
00330
00331
00332 if (B.size() > A.size())
00333 LFATAL("Incompatibale histograms");
00334 double dist = 0;
00335
00336
00337 for(uint level=0; level<A.size(); level++)
00338 {
00339 Histogram modelHist = A[level];
00340 Histogram testHist = B[level];
00341
00342 double weight;
00343 if (level == 0)
00344 weight = 1.0F/(double)(1<<(A.size()));
00345 else
00346 weight = 1.0F/(double)(1<<(A.size()-level+1));
00347 dist += weight*modelHist.getDistance(testHist);
00348
00349 }
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360
00361
00362
00363
00364
00365
00366 return dist;
00367
00368 }
00369
00370 std::vector<std::vector<byte> > ObjRecSPM::getSiftDescriptor(const Image<float> &lum,
00371 const float x, const float y, const float s)
00372 {
00373
00374 Image<float> mag, ori;
00375 gradientSobel(lum, mag, ori, 3);
00376
00377 Histogram OV(36);
00378
00379
00380 calculateOrientationVector(x, y, s, mag, ori, OV);
00381
00382
00383
00384
00385
00386 return createVectorsAndKeypoints(x, y, s, mag, ori, OV);
00387
00388 }
00389
00390 void ObjRecSPM::calculateOrientationVector(const float x, const float y, const float s,
00391 const Image<float>& gradmag, const Image<float>& gradorie, Histogram& OV) {
00392
00393
00394
00395
00396 const float sigma = s;
00397
00398 const float sig = 1.5F * sigma, inv2sig2 = - 0.5F / (sig * sig);
00399 const int dimX = gradmag.getWidth(), dimY = gradmag.getHeight();
00400
00401 const int xi = int(x + 0.5f);
00402 const int yi = int(y + 0.5f);
00403
00404 const int rad = int(3.0F * sig);
00405 const int rad2 = rad * rad;
00406
00407
00408
00409 int starty = yi - rad; if (starty < 0) starty = 0;
00410 int stopy = yi + rad; if (stopy >= dimY) stopy = dimY-1;
00411
00412
00413 for (int ind_y = starty; ind_y <= stopy; ind_y ++)
00414 {
00415
00416
00417 const int yoff = ind_y - yi;
00418 const int bound = int(sqrtf(float(rad2 - yoff*yoff)) + 0.5F);
00419 int startx = xi - bound; if (startx < 0) startx = 0;
00420 int stopx = xi + bound; if (stopx >= dimX) stopx = dimX-1;
00421
00422 for (int ind_x = startx; ind_x <= stopx; ind_x ++)
00423 {
00424 const float dx = float(ind_x) - x, dy = float(ind_y) - y;
00425 const float distSq = dx * dx + dy * dy;
00426
00427
00428 const float gradVal = gradmag.getVal(ind_x, ind_y);
00429
00430
00431 const float gaussianWeight = expf(distSq * inv2sig2);
00432
00433
00434
00435 float angle = gradorie.getVal(ind_x, ind_y) + M_PI;
00436
00437
00438 angle = 0.5F * angle * itsNumOriArray / M_PI;
00439 while (angle < 0.0F) angle += itsNumOriArray;
00440 while (angle >= itsNumOriArray) angle -= itsNumOriArray;
00441
00442 OV.addValueInterp(angle, gaussianWeight * gradVal);
00443 }
00444 }
00445
00446
00447
00448 for (int i = 0; i < 3; i++) OV.smooth();
00449 }
00450
00451
00452
00453
00454 std::vector<std::vector<byte> > ObjRecSPM::createVectorsAndKeypoints(const float x,
00455 const float y, const float s,
00456 const Image<float>& gradmag, const Image<float>& gradorie, Histogram& OV)
00457 {
00458
00459 const float sigma = s;
00460
00461
00462 float maxPeakValue = OV.findMax();
00463
00464 const int xi = int(x + 0.5f);
00465 const int yi = int(y + 0.5f);
00466
00467 uint numkp = 0;
00468
00469 std::vector<std::vector<byte> > descriptor;
00470
00471
00472
00473 for (int bin = 0; bin < itsNumOriArray; bin++)
00474 {
00475
00476 const float midval = OV.getValue(bin);
00477
00478
00479 if (midval < 0.8F * maxPeakValue) continue;
00480
00481
00482 const float leftval = OV.getValue((bin == 0) ? itsNumOriArray-1 : bin-1);
00483
00484
00485 const float rightval = OV.getValue((bin == itsNumOriArray-1) ? 0 : bin+1);
00486
00487
00488 if (leftval > midval) continue;
00489 if (rightval > midval) continue;
00490
00491
00492
00493
00494
00495
00496
00497
00498
00499
00500 const float a = 0.5f * (leftval + rightval) - midval;
00501 const float b = 0.5f * (rightval - leftval);
00502 float realangle = float(bin) - 0.5F * b / a;
00503
00504 realangle *= 2.0F * M_PI / itsNumOriArray;
00505 realangle -= M_PI;
00506
00507
00508
00509
00510 FeatureVector fv;
00511
00512 const float sinAngle = sin(realangle), cosAngle = cos(realangle);
00513
00514
00515 const int radius = int(5.0F * sigma + 0.5F);
00516 const float gausssig = float(radius);
00517 const float gaussfac = - 0.5F / (gausssig * gausssig);
00518
00519
00520
00521
00522
00523
00524
00525
00526 int scale = abs(int(s));
00527 scale = scale > 5 ? 5 : scale;
00528
00529 for (int ry = -radius; ry <= radius; ry++)
00530 for (int rx = -radius; rx <= radius; rx++)
00531 {
00532
00533 const float newX = rx * cosAngle - ry * sinAngle;
00534 const float newY = rx * sinAngle + ry * cosAngle;
00535
00536
00537 const float orgX = newX + float(xi);
00538 const float orgY = newY + float(yi);
00539
00540
00541 if (gradmag.coordsOk(orgX, orgY) == false) continue;
00542
00543
00544
00545 const float xf = 2.0F + 2.0F * float(rx) / float(radius);
00546 const float yf = 2.0F + 2.0F * float(ry) / float(radius);
00547
00548
00549
00550
00551 const float gaussFactor = expf((newX*newX+newY*newY) * gaussfac);
00552 const float weightedMagnitude =
00553 gaussFactor * gradmag.getValInterp(orgX, orgY);
00554
00555
00556
00557 float gradAng = gradorie.getValInterp(orgX, orgY) - realangle;
00558
00559 gradAng=fmod(gradAng, 2*M_PI);
00560
00561
00562 if (gradAng < 0.0) gradAng+=2*M_PI;
00563 if (gradAng >= M_PI) gradAng-=2*M_PI;
00564
00565 const float orient = (gradAng + M_PI) * 8 / (2 * M_PI);
00566
00567
00568
00569
00570
00571
00572
00573
00574
00575
00576 fv.addValue(xf, yf, orient, weightedMagnitude);
00577
00578 }
00579
00580
00581 std::vector<byte> oriVec;
00582
00583 fv.toByteKey(oriVec);
00584
00585 double mag = fv.getMag();
00586 if (oriVec.size() > 0 && mag > 0)
00587 descriptor.push_back(oriVec);
00588
00589
00590 ++ numkp;
00591
00592 }
00593 return descriptor;
00594 }
00595
00596
00597
00598
00599
00600
00601
00602
00603
00604