00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064 #if !defined(HAVE_OPENCV) || !defined(INVT_HAVE_LIBTORCH)
00065
00066 #include "Util/log.H"
00067
00068 int main()
00069 {
00070 LERROR("Sorry, this program needs the OpenCV and torch libraries.") ;
00071 return 255 ;
00072 }
00073
00074 #else // the actual program in all its hideous glory
00075
00076
00077
00078 #include "Image/OpenCVUtil.H"
00079
00080 #include <fstream>
00081
00082
00083 #include "Neuro/GistEstimatorBeyondBoF.H"
00084
00085
00086 #include "Neuro/StdBrain.H"
00087 #include "Neuro/NeuroOpts.H"
00088 #include "Neuro/NeuroSimEvents.H"
00089
00090 #include "Media/SimFrameSeries.H"
00091 #include "Media/MediaOpts.H"
00092
00093 #include "Simulation/SimEventQueue.H"
00094 #include "Simulation/SimEventQueueConfigurator.H"
00095
00096 #include "Channels/ChannelOpts.H"
00097 #include "Component/ModelManager.H"
00098 #include "Component/ModelOptionDef.H"
00099
00100 #include "Image/Point2D.H"
00101
00102 #include "nub/ref.h"
00103
00104
00105 #include <torch/general.h>
00106 #include <torch/QCTrainer.h>
00107 #include <torch/SVMClassification.h>
00108 #include <torch/Kernel.h>
00109 #include <torch/MatDataSet.h>
00110
00111
00112 #include <glob.h>
00113 #include <unistd.h>
00114
00115
00116 #include <sstream>
00117 #include <ios>
00118 #include <numeric>
00119 #include <algorithm>
00120 #include <functional>
00121 #include <map>
00122 #include <vector>
00123 #include <iterator>
00124 #include <stdexcept>
00125 #include <utility>
00126 #include <limits>
00127 #include <cmath>
00128
00129
00130
00131
00132
00133
00134
00135
00136 template<typename T>
00137 static std::string to_string(const T& t)
00138 {
00139 std::ostringstream str ;
00140 str << t ;
00141 return str.str() ;
00142 }
00143
00144
00145
00146 template<typename T>
00147 static T from_string(const std::string& s, const T& defval = T())
00148 {
00149 T t(defval) ;
00150 std::istringstream str(s) ;
00151 str >> t ;
00152 return t ;
00153 }
00154
00155
00156
00157
00158
00159
00160 template<>
00161 std::string from_string(const std::string& s, const std::string&)
00162 {
00163 return s ;
00164 }
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175 namespace {
00176
00177 const ModelOptionCateg MOC_BBOF = {
00178 MOC_SORTPRI_3,
00179 "Options specific to the Beyond Bag-of-Features program",
00180 } ;
00181
00182
00183
00184
00185 #ifndef BBOF_DEFAULT_TRAINING_DESCRIPTORS_FILE
00186 #define BBOF_DEFAULT_TRAINING_DESCRIPTORS_FILE "sift_descriptors.txt"
00187 #endif
00188
00189 const ModelOptionDef OPT_SiftDescriptors = {
00190 MODOPT_ARG_STRING, "SiftDescriptors", & MOC_BBOF, OPTEXP_CORE,
00191 "This option specifies the name of the file where SIFT descriptors\n"
00192 "for the training images are to be accumulated. This is a plain text\n"
00193 "file containing the descriptors that will be fed into the K-means\n"
00194 "procedure during the second training phase.\n",
00195 "sift-descriptors", '\0', "sift-descriptors-file",
00196 BBOF_DEFAULT_TRAINING_DESCRIPTORS_FILE,
00197 } ;
00198
00199
00200
00201
00202 #ifndef BBOF_DEFAULT_VOCABULARY_FILE
00203 #define BBOF_DEFAULT_VOCABULARY_FILE "sift_vocabulary.txt"
00204 #endif
00205
00206 const ModelOptionDef OPT_SiftVocabulary = {
00207 MODOPT_ARG_STRING, "SiftVocabulary", & MOC_BBOF, OPTEXP_CORE,
00208 "This option specifies the name of the file in which the \"prototypical\"\n"
00209 "SIFT descriptors are (or are to be) stored. This is a plain text\n"
00210 "file containing the centroids of the K-means clusters, which are used\n"
00211 "during gist vector computation to create feature maps and, subsequently,\n"
00212 "the multi-level histograms using the spatial matching pyramid as\n"
00213 "described in the Lazebnik paper.\n",
00214 "sift-vocabulary", '\0', "sift-vocabulary-file",
00215 BBOF_DEFAULT_VOCABULARY_FILE,
00216 } ;
00217
00218
00219
00220
00221
00222 #ifndef BBOF_DEFAULT_TRAINING_HISTOGRAMS_FILE
00223 #define BBOF_DEFAULT_TRAINING_HISTOGRAMS_FILE "training_histograms.txt"
00224 #endif
00225
00226 const ModelOptionDef OPT_HistogramsFile = {
00227 MODOPT_ARG_STRING, "HistogramsFile", & MOC_BBOF, OPTEXP_CORE,
00228 "This option specifies the name of the training histograms database,\n"
00229 "a plain text file containing one histogram entry per line. The\n"
00230 "first field specifies the name plus number of the entry (e.g.,\n"
00231 "foo.mpg:1, bar.mpg:5, and so on). The second field specifies the ground\n"
00232 "truth for this particular image. The remaining fields are simply the\n"
00233 "4200 numbers making up the image's flattened out multi-level histogram,\n"
00234 "which serves as its gist vector.\n",
00235 "training-histograms", '\0', "training-histograms-file",
00236 BBOF_DEFAULT_TRAINING_HISTOGRAMS_FILE,
00237 } ;
00238
00239
00240
00241
00242
00243
00244
00245 #ifndef BBOF_DEFAULT_SVM_CLASSIFIER_FILE
00246 #define BBOF_DEFAULT_SVM_CLASSIFIER_FILE "svm_classifier.txt"
00247 #endif
00248
00249 const ModelOptionDef OPT_SvmClassifierFile = {
00250 MODOPT_ARG_STRING, "SvmClassifierFile", & MOC_BBOF, OPTEXP_CORE,
00251 "This option specifies the name of the file that will hold the SVM\n"
00252 "classifier for a given segment. This file is read and written by the\n"
00253 "torch library.",
00254 "svm-classifier", '\0', "svm-classifier-file",
00255 BBOF_DEFAULT_SVM_CLASSIFIER_FILE,
00256 } ;
00257
00258
00259
00260
00261
00262
00263 #ifndef BBOF_DEFAULT_SVM_TEMP_FILE
00264 #define BBOF_DEFAULT_SVM_TEMP_FILE "/tmp/train-bbof-torch-dataset.txt"
00265 #endif
00266
00267 const ModelOptionDef OPT_SvmTempFile = {
00268 MODOPT_ARG_STRING, "SvmTempFile", & MOC_BBOF, OPTEXP_CORE,
00269 "This option specifies the name of the temp file that will hold the SVM\n"
00270 "training data in the format required by the torch library. This file is\n"
00271 "is automatically deleted when it is no longer required.",
00272 "svm-temp", '\0', "svm-temp-file",
00273 BBOF_DEFAULT_SVM_TEMP_FILE,
00274 } ;
00275
00276
00277
00278 #ifndef BBOF_DEFAULT_CLASSIFICATION_RESULTS_FILE
00279 #define BBOF_DEFAULT_CLASSIFICATION_RESULTS_FILE "bbof_classifications.txt"
00280 #endif
00281
00282 const ModelOptionDef OPT_ResultsFile = {
00283 MODOPT_ARG_STRING, "ResultsFile", & MOC_BBOF, OPTEXP_CORE,
00284 "This option specifies the name of the classification results file,\n"
00285 "a plain text file containing one result entry per line. The first\n"
00286 "field specifies the name of the input image plus number of the entry,\n"
00287 "(e.g., foo.mpg:1, bar.mpg:5, and so on). Then comes the ground truth\n"
00288 "for this image followed by its classification result.\n",
00289 "results-file", '\0', "classification-results-file",
00290 BBOF_DEFAULT_CLASSIFICATION_RESULTS_FILE,
00291 } ;
00292
00293
00294
00295
00296
00297
00298
00299
00300 #ifndef BBOF_DEFAULT_IMAGE_NAME
00301 #define BBOF_DEFAULT_IMAGE_NAME "some_image"
00302 #endif
00303 #ifndef BBOF_DEFAULT_SEGMENT_NUMBER
00304 #define BBOF_DEFAULT_SEGMENT_NUMBER "0"
00305 #endif
00306
00307 const ModelOptionDef OPT_ImageName = {
00308 MODOPT_ARG_STRING, "ImageName", & MOC_BBOF, OPTEXP_CORE,
00309 "This option specifies the \"root\" name for an image. The image number\n"
00310 "will be automatically appended to this \"root\" name with a colon as the\n"
00311 "separator between name and frame number. The current input MPEG file\n"
00312 "name is a good choice for the value of this option.\n",
00313 "image-name", '\0', "input-MPEG-file-name",
00314 BBOF_DEFAULT_IMAGE_NAME,
00315 } ;
00316
00317 const ModelOptionDef OPT_SegmentNumber = {
00318 MODOPT_ARG_STRING, "SegmentNumber", & MOC_BBOF, OPTEXP_CORE,
00319 "This option specifies the segment number for an image in the training\n"
00320 "set. The segment number is used to specify the ground truth for the\n"
00321 "image classification.\n",
00322 "segment-number", '\0', "image-segment-number",
00323 BBOF_DEFAULT_SEGMENT_NUMBER,
00324 } ;
00325
00326
00327
00328
00329 #ifndef BBOF_DEFAULT_GIST_VECTORS_FILE
00330 #define BBOF_DEFAULT_GIST_VECTORS_FILE "gist_vectors.txt"
00331 #endif
00332
00333 const ModelOptionDef OPT_GistVectors = {
00334 MODOPT_ARG_STRING, "GistVectors", & MOC_BBOF, OPTEXP_CORE,
00335 "This option specifies the name of the file in which gist vectors are\n"
00336 "stored. This option is used when classification must be performed\n"
00337 "using gist vectors computed by some other entity. This is useful, for\n"
00338 "example, when a client program running on some other host computes gist\n"
00339 "vectors and passes these vectors to this program for classification.\n"
00340 "In such situations, we do not need to read images and compute gist\n"
00341 "vectors for them. Instead, we bypass all that and simply perform the\n"
00342 "classification using the precomputed vectors.\n",
00343 "gist-vectors", '\0', "gist-vectors-file",
00344 BBOF_DEFAULT_GIST_VECTORS_FILE,
00345 } ;
00346
00347
00348
00349
00350
00351 #ifndef BBOF_DEFAULT_VOCABULARY_SIZE
00352 #define BBOF_DEFAULT_VOCABULARY_SIZE "200"
00353 #endif
00354
00355 const ModelOptionDef OPT_VocabularySize = {
00356 MODOPT_ARG_STRING, "VocabularySize", & MOC_BBOF, OPTEXP_CORE,
00357 "This option specifies the size of the SIFT vocabulary.\n",
00358 "vocabulary-size", '\0', "vocabulary-size",
00359 BBOF_DEFAULT_VOCABULARY_SIZE,
00360 } ;
00361
00362
00363
00364
00365
00366
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414
00415
00416
00417
00418
00419
00420
00421
00422
00423
00424
00425
00426
00427
00428
00429
00430 #ifndef BBOF_SIFT_CMD
00431 #define BBOF_SIFT_CMD "sift"
00432 #endif
00433 #ifndef BBOF_VOCABULARY_CMD
00434 #define BBOF_VOCABULARY_CMD "vocab"
00435 #endif
00436 #ifndef BBOF_HISTOGRAM_CMD
00437 #define BBOF_HISTOGRAM_CMD "hist"
00438 #endif
00439 #ifndef BBOF_SVM_CMD
00440 #define BBOF_SVM_CMD "svm"
00441 #endif
00442 #ifndef BBOF_CLASSIFY_CMD
00443 #define BBOF_CLASSIFY_CMD "classify"
00444 #endif
00445 #ifndef BBOF_CLASSIFY_GV_CMD
00446 #define BBOF_CLASSIFY_GV_CMD "classify_gv"
00447 #endif
00448
00449
00450 #ifndef BBOF_ACTIONS
00451 #define BBOF_ACTIONS ("{"BBOF_SIFT_CMD"|"BBOF_VOCABULARY_CMD"|"\
00452 BBOF_HISTOGRAM_CMD"|"BBOF_SVM_CMD"|"\
00453 BBOF_CLASSIFY_CMD"|"BBOF_CLASSIFY_GV_CMD"}")
00454 #endif
00455
00456 }
00457
00458
00459
00460
00461
00462
00463 namespace {
00464
00465 class BBoFSimulation {
00466 ModelManager model_manager ;
00467 nub::soft_ref<SimEventQueueConfigurator> configurator ;
00468 nub::soft_ref<StdBrain> brain ;
00469 nub::ref<SimInputFrameSeries> input_frame_series ;
00470
00471
00472 OModelParam<std::string> sd_option ;
00473 OModelParam<std::string> sv_option ;
00474 OModelParam<std::string> th_option ;
00475 OModelParam<std::string> sc_option ;
00476 OModelParam<std::string> st_option ;
00477 OModelParam<std::string> rf_option ;
00478 OModelParam<std::string> in_option ;
00479 OModelParam<std::string> sn_option ;
00480 OModelParam<std::string> gv_option ;
00481 OModelParam<std::string> vs_option ;
00482
00483 public :
00484 BBoFSimulation(const std::string& model_name) ;
00485 void parse_command_line(int argc, const char* argv[]) ;
00486 void run() ;
00487 ~BBoFSimulation() ;
00488
00489 private :
00490
00491 typedef void (BBoFSimulation::*Action)() ;
00492 typedef std::map<std::string, Action> ActionMap ;
00493 ActionMap action_map ;
00494
00495 void accumulate_sift_descriptors() ;
00496 void compute_sift_vocabulary() ;
00497 void compute_training_histograms() ;
00498 void generate_svm_classifier() ;
00499 void classify_input_images() ;
00500 void classify_using_gist_vectors() ;
00501
00502
00503 std::string sift_descriptors_file() {return sd_option.getVal() ;}
00504 std::string sift_vocabulary_file() {return sv_option.getVal() ;}
00505 std::string histograms_file() {return th_option.getVal() ;}
00506 std::string svm_classifier_file() {return sc_option.getVal() ;}
00507 std::string svm_temp_file() {return st_option.getVal() ;}
00508 std::string results_file() {return rf_option.getVal() ;}
00509 std::string image_name() {return in_option.getVal() ;}
00510 std::string segment_number() {return sn_option.getVal() ;}
00511 std::string gist_vectors_file() {return gv_option.getVal() ;}
00512 int vocabulary_size() {return from_string<int>(vs_option.getVal()) ;}
00513 } ;
00514
00515
00516
00517 BBoFSimulation::BBoFSimulation(const std::string& model_name)
00518 : model_manager(model_name),
00519 configurator(new SimEventQueueConfigurator(model_manager)),
00520 brain(new StdBrain(model_manager)),
00521 input_frame_series(new SimInputFrameSeries(model_manager)),
00522 sd_option(& OPT_SiftDescriptors, & model_manager),
00523 sv_option(& OPT_SiftVocabulary, & model_manager),
00524 th_option(& OPT_HistogramsFile, & model_manager),
00525 sc_option(& OPT_SvmClassifierFile, & model_manager),
00526 st_option(& OPT_SvmTempFile, & model_manager),
00527 rf_option(& OPT_ResultsFile, & model_manager),
00528 in_option(& OPT_ImageName, & model_manager),
00529 sn_option(& OPT_SegmentNumber, & model_manager),
00530 gv_option(& OPT_GistVectors, & model_manager),
00531 vs_option(& OPT_VocabularySize, & model_manager)
00532 {
00533 model_manager.addSubComponent(configurator) ;
00534 model_manager.addSubComponent(brain) ;
00535 model_manager.addSubComponent(input_frame_series) ;
00536
00537 typedef BBoFSimulation me ;
00538 action_map[BBOF_SIFT_CMD] = & me::accumulate_sift_descriptors ;
00539 action_map[BBOF_VOCABULARY_CMD] = & me::compute_sift_vocabulary ;
00540 action_map[BBOF_HISTOGRAM_CMD] = & me::compute_training_histograms ;
00541 action_map[BBOF_SVM_CMD] = & me::generate_svm_classifier ;
00542 action_map[BBOF_CLASSIFY_CMD] = & me::classify_input_images ;
00543 action_map[BBOF_CLASSIFY_GV_CMD] = & me::classify_using_gist_vectors ;
00544 }
00545
00546 void BBoFSimulation::parse_command_line(int argc, const char* argv[])
00547 {
00548 model_manager.setOptionValString(& OPT_GistEstimatorType, "BBoF") ;
00549
00550 model_manager.setOptionValString(& OPT_SiftDescriptors,
00551 BBOF_DEFAULT_TRAINING_DESCRIPTORS_FILE) ;
00552 model_manager.setOptionValString(& OPT_SiftVocabulary,
00553 BBOF_DEFAULT_VOCABULARY_FILE) ;
00554 model_manager.setOptionValString(& OPT_HistogramsFile,
00555 BBOF_DEFAULT_TRAINING_HISTOGRAMS_FILE ) ;
00556 model_manager.setOptionValString(& OPT_SvmClassifierFile,
00557 BBOF_DEFAULT_SVM_CLASSIFIER_FILE ) ;
00558 model_manager.setOptionValString(& OPT_SvmTempFile,
00559 BBOF_DEFAULT_SVM_TEMP_FILE ) ;
00560 model_manager.setOptionValString(& OPT_ResultsFile,
00561 BBOF_DEFAULT_CLASSIFICATION_RESULTS_FILE) ;
00562
00563 model_manager.setOptionValString(& OPT_ImageName,
00564 BBOF_DEFAULT_IMAGE_NAME) ;
00565 model_manager.setOptionValString(& OPT_SegmentNumber,
00566 BBOF_DEFAULT_SEGMENT_NUMBER) ;
00567
00568 model_manager.setOptionValString(& OPT_GistVectors,
00569 BBOF_DEFAULT_GIST_VECTORS_FILE) ;
00570
00571 model_manager.setOptionValString(& OPT_VocabularySize,
00572 BBOF_DEFAULT_VOCABULARY_SIZE) ;
00573
00574 if (! model_manager.parseCommandLine(argc, argv, BBOF_ACTIONS, 1, 1))
00575 throw std::runtime_error("command line parse error") ;
00576 }
00577
00578
00579
00580 void BBoFSimulation::run()
00581 {
00582 std::string cmd(model_manager.getExtraArg(0)) ;
00583 ActionMap::iterator action = action_map.find(cmd) ;
00584 if (action == action_map.end())
00585 throw std::runtime_error(cmd + ": sorry, unknown action") ;
00586 (this->*(action->second))() ;
00587 }
00588
00589
00590
00591
00592 BBoFSimulation::~BBoFSimulation(){}
00593
00594
00595
00596
00597
00598 class ModelManagerStarter {
00599 ModelManager& mgr ;
00600 public :
00601 ModelManagerStarter(ModelManager& m) : mgr(m) {mgr.start() ;}
00602 ~ModelManagerStarter() {mgr.stop() ;}
00603 } ;
00604
00605 }
00606
00607
00608
00609 int main(int argc, const char* argv[])
00610 {
00611 MYLOGVERB = LOG_INFO ;
00612 try
00613 {
00614 BBoFSimulation S("train-bbof Model") ;
00615 S.parse_command_line(argc, argv) ;
00616 S.run() ;
00617 }
00618 catch (std::exception& e)
00619 {
00620 LFATAL("%s", e.what()) ;
00621 return 1 ;
00622 }
00623 return 0 ;
00624 }
00625
00626
00627
00628
00629
00630 namespace {
00631
00632
00633 typedef GistEstimatorBeyondBoF::SiftGrid SiftGrid ;
00634
00635
00636
00637 class sift_descriptors_accumulator {
00638 sift_descriptors_accumulator() ;
00639 ~sift_descriptors_accumulator() ;
00640 public :
00641 static std::string output_file ;
00642 static std::string image_name ;
00643 static int frame_number ;
00644 static std::string segment_number ;
00645
00646 static void write(const SiftGrid&) ;
00647 } ;
00648
00649
00650
00651
00652
00653
00654
00655
00656 void BBoFSimulation::accumulate_sift_descriptors()
00657 {
00658 ModelManagerStarter M(model_manager) ;
00659
00660 nub::soft_ref<GistEstimatorBeyondBoF> ge =
00661 dynCastWeak<GistEstimatorBeyondBoF>(
00662 model_manager.subComponent("GistEstimatorBeyondBoF", MC_RECURSE)) ;
00663 if (ge.isInvalid())
00664 throw std::runtime_error("can only use GistEstimatorBeyondBoF") ;
00665
00666 typedef sift_descriptors_accumulator acc ;
00667 acc::output_file = sift_descriptors_file() ;
00668 acc::image_name = image_name() ;
00669 acc::segment_number = segment_number() ;
00670 ge->setTrainingHook(acc::write) ;
00671
00672 nub::ref<SimEventQueue> event_queue = configurator->getQ() ;
00673 for(;;)
00674 {
00675 try
00676 {
00677 acc::frame_number = input_frame_series->frame() ;
00678 if (event_queue->evolve() != SIM_CONTINUE)
00679 break ;
00680 }
00681 catch (lfatal_exception&)
00682 {
00683 return ;
00684 }
00685 }
00686 }
00687
00688
00689
00690
00691 std::string sift_descriptors_accumulator::output_file ;
00692 std::string sift_descriptors_accumulator::image_name ;
00693 int sift_descriptors_accumulator::frame_number ;
00694 std::string sift_descriptors_accumulator::segment_number ;
00695
00696
00697
00698
00699
00700
00701
00702
00703
00704
00705
00706
00707
00708
00709
00710
00711
00712
00713
00714
00715
00716
00717
00718
00719
00720
00721
00722
00723
00724
00725 void sift_descriptors_accumulator::write(const SiftGrid& G)
00726 {
00727 if (output_file.empty())
00728 throw std::runtime_error("SIFT descriptors accumulator output file "
00729 "not specified") ;
00730
00731 std::ofstream ofs(output_file.c_str(), std::ios::out | std::ios::app) ;
00732 for (int y = 0; y < G.getHeight(); ++y)
00733 for (int x = 0; x < G.getWidth(); ++x)
00734 ofs << image_name << ':' << frame_number << ' '
00735 << segment_number << ' '
00736 << y << ' ' << x << ' ' << G.getVal(x, y) << '\n' ;
00737 }
00738
00739 }
00740
00741
00742
00743 namespace {
00744
00745
00746 class OpenCVMatrix {
00747 CvMat* matrix ;
00748 public :
00749 OpenCVMatrix(int num_rows, int num_cols, int type) ;
00750 OpenCVMatrix(CvMat*) ;
00751 ~OpenCVMatrix() ;
00752
00753 int num_rows() const {return matrix->rows ;}
00754 int num_cols() const {return matrix->cols ;}
00755 int type() const {return CV_MAT_TYPE(matrix->type) ;}
00756
00757 template<typename T>
00758 T get(int i, int j) const {return CV_MAT_ELEM(*matrix, T, i, j) ;}
00759
00760 operator CvMat*() const {return matrix ;}
00761 } ;
00762
00763 OpenCVMatrix::OpenCVMatrix(int num_rows, int num_cols, int type)
00764 : matrix(cvCreateMat(num_rows, num_cols, type))
00765 {
00766 if (! matrix)
00767 throw std::runtime_error("unable to create OpenCV matrix") ;
00768 }
00769
00770 OpenCVMatrix::OpenCVMatrix(CvMat* M)
00771 : matrix(M)
00772 {
00773 if (! matrix)
00774 throw std::runtime_error("cannot create empty/null matrix") ;
00775 }
00776
00777 OpenCVMatrix::~OpenCVMatrix()
00778 {
00779 cvReleaseMat(& matrix) ;
00780 }
00781
00782 }
00783
00784
00785
00786
00787
00788 namespace {
00789
00790
00791 typedef Image<float> Vocabulary ;
00792
00793
00794 int count_lines(const std::string& file_name) ;
00795 CvMat* load_sift_descriptors(const std::string& file_name, int num_lines) ;
00796 CvMat* kmeans(int K, const OpenCVMatrix& data) ;
00797 void save_vocabulary(const OpenCVMatrix&, const std::string& file_name) ;
00798
00799
00800
00801
00802
00803 void BBoFSimulation::compute_sift_vocabulary()
00804 {
00805 LINFO("MVN: counting lines in %s", sift_descriptors_file().c_str()) ;
00806 int num_rows = count_lines(sift_descriptors_file()) ;
00807
00808 LINFO("MVN: reading %d SIFT descriptors from %s",
00809 num_rows, sift_descriptors_file().c_str()) ;
00810 OpenCVMatrix sift_descriptors =
00811 load_sift_descriptors(sift_descriptors_file(), num_rows) ;
00812
00813
00814
00815 const int K = vocabulary_size() ;
00816 LINFO("MVN: doing K-means on SIFT descriptors to get %d clusters", K) ;
00817 OpenCVMatrix vocabulary = kmeans(K, sift_descriptors) ;
00818
00819 LINFO("MVN: K-means done; saving SIFT vocabulary to %s",
00820 sift_vocabulary_file().c_str()) ;
00821 save_vocabulary(vocabulary, sift_vocabulary_file()) ;
00822 }
00823
00824
00825
00826
00827
00828
00829 CvMat* load_sift_descriptors(const std::string& file_name, int num_rows)
00830 {
00831 int num_cols = GistEstimatorBeyondBoF::SiftDescriptor::SIZE ;
00832 CvMat* M = cvCreateMat(num_rows, num_cols, CV_32FC1) ;
00833
00834 double d ; std::string dummy ;
00835 std::ifstream ifs(file_name.c_str()) ;
00836 for (int i = 0; i < num_rows; ++i)
00837 {
00838 std::string str ;
00839 std::getline(ifs, str) ;
00840 if (! ifs || str.empty()) {
00841 if (i == num_rows - 1)
00842 break ;
00843 else {
00844 cvReleaseMat(& M) ;
00845 throw std::runtime_error(file_name +
00846 ": missing SIFT descriptors or other read error") ;
00847 }
00848 }
00849 std::istringstream line(str) ;
00850 line >> dummy >> dummy >> dummy >> dummy ;
00851
00852 for (int j = 0; j < num_cols; ++j) {
00853 if (! line) {
00854 cvReleaseMat(& M) ;
00855 throw std::runtime_error(file_name +
00856 ": missing SIFT descriptor values on line " + to_string(i)) ;
00857 }
00858 line >> d ;
00859 cvmSet(M, i, j, d) ;
00860 }
00861 }
00862
00863 return M ;
00864 }
00865
00866
00867 #ifndef BBOF_KMEANS_ITERATIONS
00868 #define BBOF_KMEANS_ITERATIONS (100)
00869 #endif
00870 #ifndef BBOF_KMEANS_PRECISION
00871 #define BBOF_KMEANS_PRECISION (.01)
00872 #endif
00873
00874
00875 CvMat* compute_centroids(int K, const OpenCVMatrix& data,
00876 const OpenCVMatrix& cluster_assignments) ;
00877
00878
00879
00880 CvMat* kmeans(int K, const OpenCVMatrix& data)
00881 {
00882 OpenCVMatrix cluster_assignments(data.num_rows(), 1, CV_32SC1) ;
00883
00884 LINFO("MVN: computing K-means cluster assignments with OpenCV") ;
00885 cvKMeans2(data, K, cluster_assignments,
00886 cvTermCriteria(CV_TERMCRIT_EPS + CV_TERMCRIT_ITER,
00887 BBOF_KMEANS_ITERATIONS, BBOF_KMEANS_PRECISION)) ;
00888
00889 LINFO("MVN: cluster assignments done; computing centroids...") ;
00890 return compute_centroids(K, data, cluster_assignments) ;
00891 }
00892
00893
00894
00895
00896 CvMat* compute_centroids(int K, const OpenCVMatrix& data,
00897 const OpenCVMatrix& cluster_assignments)
00898 {
00899 CvMat* centroids = cvCreateMat(K, data.num_cols(), data.type()) ;
00900 cvZero(centroids) ;
00901
00902 std::vector<int> cluster_counts(K) ;
00903 std::fill(cluster_counts.begin(), cluster_counts.end(), 0) ;
00904
00905 for (int i = 0; i < data.num_rows(); ++i)
00906 {
00907 int C = cluster_assignments.get<int>(i, 0) ;
00908 ++cluster_counts[C] ;
00909
00910
00911 for (int j = 0; j < data.num_cols(); ++j)
00912 cvmSet(centroids, C, j,
00913 cvmGet(centroids, C, j) + data.get<float>(i, j)) ;
00914 }
00915
00916
00917
00918 for (int C = 0; C < K; ++C)
00919 for (int j = 0; j < data.num_cols(); ++j)
00920 cvmSet(centroids, C, j,
00921 cvmGet(centroids, C, j) / cluster_counts[C]) ;
00922
00923 return centroids ;
00924 }
00925
00926
00927 void save_vocabulary(const OpenCVMatrix& vocabulary,
00928 const std::string& file_name)
00929 {
00930 std::ofstream ofs(file_name.c_str()) ;
00931 for (int i = 0; i < vocabulary.num_rows(); ++i) {
00932 for (int j = 0; j < vocabulary.num_cols(); ++j)
00933 ofs << vocabulary.get<float>(i, j) << ' ' ;
00934 ofs << '\n' ;
00935 }
00936 }
00937
00938
00939 Vocabulary load_vocabulary(const std::string& file_name)
00940 {
00941 const int M = count_lines(file_name) ;
00942 const int N = GistEstimatorBeyondBoF::SiftDescriptor::SIZE ;
00943 Vocabulary V(N, M, ZEROS) ;
00944
00945 float f ;
00946 std::ifstream ifs(file_name.c_str()) ;
00947 for (int j = 0; j < M; ++j)
00948 for (int i = 0; i < N; ++i) {
00949 if (! ifs)
00950 throw std::runtime_error(file_name + ": out of data?!?") ;
00951 ifs >> f ;
00952 V.setVal(i, j, f) ;
00953 }
00954
00955 return V ;
00956 }
00957
00958 }
00959
00960
00961
00962
00963
00964
00965
00966
00967
00968 namespace {
00969
00970
00971 typedef Image<double> Histogram ;
00972
00973
00974 void save_histogram(const Histogram& histogram, const std::string& file_name,
00975 const std::string& image_name, int frame_number,
00976 const std::string& segment_number) ;
00977
00978
00979
00980
00981
00982
00983
00984
00985
00986 void BBoFSimulation::compute_training_histograms()
00987 {
00988 ModelManagerStarter M(model_manager) ;
00989
00990 nub::soft_ref<GistEstimatorBeyondBoF> ge =
00991 dynCastWeak<GistEstimatorBeyondBoF>(
00992 model_manager.subComponent("GistEstimatorBeyondBoF", MC_RECURSE)) ;
00993 if (ge.isInvalid())
00994 throw std::runtime_error("can only use GistEstimatorBeyondBoF") ;
00995
00996 Vocabulary V = load_vocabulary(sift_vocabulary_file()) ;
00997 ge->setVocabulary(V) ;
00998 LINFO("MVN: loaded SIFT vocabulary of %d vis-terms from %s",
00999 V.getHeight(), sift_vocabulary_file().c_str()) ;
01000
01001 nub::ref<SimEventQueue> event_queue = configurator->getQ() ;
01002 for(;;)
01003 {
01004 try
01005 {
01006 SeC<SimEventGistOutput> gist_out =
01007 event_queue->check<SimEventGistOutput>(brain.get(),
01008 SEQ_UNMARKED | SEQ_MARK,
01009 ge.get()) ;
01010 if (gist_out)
01011 save_histogram(ge->getGist(), histograms_file(),
01012 image_name(), input_frame_series->frame(),
01013 segment_number()) ;
01014 if (event_queue->evolve() != SIM_CONTINUE)
01015 break ;
01016 }
01017 catch (lfatal_exception&)
01018 {
01019 return ;
01020 }
01021 }
01022 }
01023
01024
01025
01026
01027
01028
01029
01030
01031 void save_histogram(const Histogram& histogram, const std::string& file_name,
01032 const std::string& image_name, int frame_number,
01033 const std::string& segment_number)
01034 {
01035 std::ofstream ofs(file_name.c_str(), std::ios::out | std::ios::app) ;
01036 ofs << image_name << ':' << frame_number << ' '
01037 << segment_number << ' ' ;
01038 for (int y = 0; y < histogram.getHeight(); ++y)
01039 for (int x = 0; x < histogram.getWidth(); ++x)
01040 ofs << histogram.getVal(x, y) << ' ' ;
01041 ofs << '\n' ;
01042 }
01043
01044 }
01045
01046
01047
01048 namespace {
01049
01050
01051 void create_torch_dataset(const std::string&, const std::string&,
01052 const std::string&) ;
01053 Torch::SVMClassification* create_torch_classifier(const std::string&) ;
01054 std::string temp_file_name() ;
01055
01056
01057 void BBoFSimulation::generate_svm_classifier()
01058 {
01059 GistEstimatorBeyondBoF::num_channels(vocabulary_size()) ;
01060
01061 create_torch_dataset(histograms_file(), segment_number(), svm_temp_file()) ;
01062 Torch::SVMClassification* svm = create_torch_classifier(svm_temp_file()) ;
01063 svm->save(svm_classifier_file().c_str()) ;
01064
01065 delete svm ;
01066 unlink(svm_temp_file().c_str()) ;
01067 }
01068
01069
01070 struct GistVector {
01071 std::vector<double> values ;
01072 GistVector() ;
01073 } ;
01074
01075 GistVector::GistVector()
01076 : values(GistEstimatorBeyondBoF::gist_vector_size())
01077 {}
01078
01079 std::istream& operator>>(std::istream& is, GistVector& g)
01080 {
01081 for (int i = 0; i < GistEstimatorBeyondBoF::gist_vector_size(); ++i)
01082 if (is)
01083 is >> g.values[i] ;
01084 else
01085 throw std::runtime_error("missing gist vector data") ;
01086 return is ;
01087 }
01088
01089 std::ostream& operator<<(std::ostream& os, const GistVector& g)
01090 {
01091 for (int i = 0; i < GistEstimatorBeyondBoF::gist_vector_size(); ++i)
01092 os << g.values[i] << ' ' ;
01093 return os ;
01094 }
01095
01096
01097
01098
01099
01100
01101
01102 void create_torch_dataset(const std::string& hist_file,
01103 const std::string& target,
01104 const std::string& torch_dataset)
01105 {
01106 const int n = count_lines(hist_file) ;
01107
01108 std::ifstream in(hist_file.c_str()) ;
01109 std::ofstream out(torch_dataset.c_str()) ;
01110
01111 std::string dummy, segment, str ;
01112 GistVector gist_vector ;
01113 out << n << ' ' << (GistEstimatorBeyondBoF::gist_vector_size() + 1) << '\n';
01114 for (int i = 0; i < n; ++i)
01115 {
01116 std::getline(in, str) ;
01117 if (! in || str.empty()) {
01118 if (i == n - 1)
01119 break ;
01120 else {
01121 out.close() ;
01122 unlink(torch_dataset.c_str()) ;
01123 throw std::runtime_error(hist_file +
01124 ": missing data or other read error") ;
01125 }
01126 }
01127 std::istringstream line(str) ;
01128 line >> dummy >> segment >> gist_vector ;
01129 out << gist_vector << ' ' << ((segment == target) ? +1 : -1) << '\n' ;
01130 }
01131 }
01132
01133
01134
01135 class HistIntKernel : public Torch::Kernel {
01136 real eval(Torch::Sequence*, Torch::Sequence*) ;
01137 } ;
01138
01139 real HistIntKernel::eval(Torch::Sequence* a, Torch::Sequence* b)
01140 {
01141 real sum = 0 ;
01142 for (int i = 0; i < a->frame_size; ++i)
01143 sum += min(a->frames[0][i], b->frames[0][i]) ;
01144 return sum ;
01145 }
01146
01147
01148
01149 Torch::SVMClassification* create_torch_classifier(const std::string& dataset)
01150 {
01151 HistIntKernel kernel ;
01152 Torch::SVMClassification* svm = new Torch::SVMClassification(& kernel) ;
01153 Torch::QCTrainer trainer(svm) ;
01154 Torch::MatDataSet data(dataset.c_str(),
01155 GistEstimatorBeyondBoF::gist_vector_size(), 1) ;
01156 trainer.train(& data, 0) ;
01157 return svm ;
01158 }
01159
01160 }
01161
01162
01163
01164 namespace {
01165
01166
01167 typedef std::vector<Torch::SVMClassification*> Classifiers ;
01168
01169
01170 Classifiers load_classifiers(std::string, HistIntKernel*) ;
01171 Histogram read_gist_vector(std::istream&) ;
01172 void classify_image(const Histogram&, const Classifiers&,
01173 const std::string&, int, const std::string&,
01174 const std::string&) ;
01175 void nuke_classifiers(Classifiers&) ;
01176
01177
01178
01179
01180
01181
01182 void BBoFSimulation::classify_input_images()
01183 {
01184 ModelManagerStarter M(model_manager) ;
01185
01186 nub::soft_ref<GistEstimatorBeyondBoF> ge =
01187 dynCastWeak<GistEstimatorBeyondBoF>(
01188 model_manager.subComponent("GistEstimatorBeyondBoF", MC_RECURSE)) ;
01189 if (ge.isInvalid())
01190 throw std::runtime_error("can only use GistEstimatorBeyondBoF") ;
01191
01192 Vocabulary V = load_vocabulary(sift_vocabulary_file()) ;
01193 ge->setVocabulary(V) ;
01194 LINFO("MVN: loaded SIFT vocabulary of %d vis-terms from %s",
01195 V.getHeight(), sift_vocabulary_file().c_str()) ;
01196
01197 HistIntKernel kernel ;
01198 Classifiers svm_classifiers =
01199 load_classifiers(svm_classifier_file(), & kernel) ;
01200
01201 nub::ref<SimEventQueue> event_queue = configurator->getQ() ;
01202 for(;;)
01203 {
01204 try
01205 {
01206 SeC<SimEventGistOutput> gist_out =
01207 event_queue->check<SimEventGistOutput>(brain.get(),
01208 SEQ_UNMARKED | SEQ_MARK,
01209 ge.get()) ;
01210 if (gist_out)
01211 classify_image(ge->getGist(), svm_classifiers,
01212 image_name(), input_frame_series->frame(),
01213 segment_number(), results_file()) ;
01214 if (event_queue->evolve() != SIM_CONTINUE)
01215 break ;
01216 }
01217 catch (lfatal_exception&)
01218 {
01219 nuke_classifiers(svm_classifiers) ;
01220 return ;
01221 }
01222 }
01223
01224 nuke_classifiers(svm_classifiers) ;
01225 }
01226
01227
01228
01229
01230
01231 void BBoFSimulation::classify_using_gist_vectors()
01232 {
01233 GistEstimatorBeyondBoF::num_channels(vocabulary_size()) ;
01234
01235 HistIntKernel kernel ;
01236 Classifiers svm_classifiers =
01237 load_classifiers(svm_classifier_file(), & kernel) ;
01238
01239 int line_number = 1 ;
01240 std::ifstream ifs(gist_vectors_file().c_str()) ;
01241 while (ifs)
01242 try
01243 {
01244 classify_image(read_gist_vector(ifs), svm_classifiers,
01245 image_name(), line_number++,
01246 segment_number(), results_file()) ;
01247 }
01248 catch (std::exception&)
01249 {
01250 }
01251
01252 nuke_classifiers(svm_classifiers) ;
01253 }
01254
01255
01256
01257
01258 void classify_image(const Histogram& gist_vector,
01259 const Classifiers& classifiers,
01260 const std::string& image_name, int frame_number,
01261 const std::string& ground_truth,
01262 const std::string& results_file)
01263 {
01264 std::ofstream ofs(results_file.c_str(), std::ios::out | std::ios::app) ;
01265 ofs << image_name << ':' << frame_number << ' ' << ground_truth << ' ' ;
01266
01267 Torch::Sequence gv(1, GistEstimatorBeyondBoF::gist_vector_size()) ;
01268 std::copy(gist_vector.begin(), gist_vector.end(), gv.frames[0]) ;
01269
01270 int n = 0 ;
01271 const int N = classifiers.size() ;
01272 for (int i = 0; i < N; ++i) {
01273 classifiers[i]->forward(& gv) ;
01274 if (classifiers[i]->outputs->frames[0][0] > 0) {
01275 ofs << (i+1) << ' ' ;
01276 ++n ;
01277 }
01278 }
01279
01280 if (! n)
01281 ofs << '0' ;
01282 ofs << '\n' ;
01283 }
01284
01285
01286
01287
01288
01289
01290
01291
01292
01293
01294
01295
01296
01297 Classifiers
01298 load_classifiers(std::string classifiers_root_name, HistIntKernel* kernel)
01299 {
01300 classifiers_root_name += ".*" ;
01301 glob_t buf ;
01302 if (glob(classifiers_root_name.c_str(), 0, 0, & buf) != 0)
01303 throw std::runtime_error("couldn't find/load the SVM classifiers") ;
01304
01305 const int N = buf.gl_pathc ;
01306 Classifiers classifiers(N) ;
01307 for (int i = 0; i < N; ++i) {
01308 classifiers[i] = new Torch::SVMClassification(kernel) ;
01309 classifiers[i]->load(buf.gl_pathv[i]) ;
01310 }
01311
01312 globfree(& buf) ;
01313 return classifiers ;
01314 }
01315
01316
01317 void nuke_classifiers(Classifiers& C)
01318 {
01319 const int N = C.size() ;
01320 for (int i = 0; i < N; ++i)
01321 delete C[i] ;
01322 }
01323
01324
01325 Histogram read_gist_vector(std::istream& is)
01326 {
01327 GistVector G ;
01328 is >> G ;
01329
01330 Histogram H(GistEstimatorBeyondBoF::gist_vector_size(), 1, NO_INIT) ;
01331 std::copy(G.values.begin(), G.values.end(), H.beginw()) ;
01332 return H ;
01333 }
01334
01335 }
01336
01337
01338
01339 namespace {
01340
01341
01342 int count_lines(const std::string& file_name)
01343 {
01344 int n = -1 ;
01345 std::ifstream ifs(file_name.c_str()) ;
01346
01347 std::string dummy ;
01348 while (ifs) {
01349 getline(ifs, dummy) ;
01350 ++n ;
01351 }
01352 return n ;
01353 }
01354
01355
01356 bool is_zero(double d)
01357 {
01358 return std::fabs(d) <= std::numeric_limits<double>::epsilon() ;
01359 }
01360
01361 }
01362
01363
01364
01365 #endif // #if !defined(HAVE_OPENCV) || !defined(INVT_HAVE_TORCH)
01366
01367
01368
01369
01370