00001 /** 00002 \file Gist/train-bbof.C 00003 00004 \brief Interface for training and testing GistEstimatorBeyondBoF. 00005 00006 The train-bbof program in conjunction with the GistEstimatorBeyondBoF 00007 class implements the following paper within the INVT framework: 00008 00009 Lazebnik, S., Schmid, C., Ponce, J. 00010 Beyond Bags of Features: Spatial Pyramid Matching for Recognizing 00011 Natural Scene Catgories 00012 CVPR, 2006. 00013 00014 Whereas the GistEstimatorBeyondBoF class is only concerned with the 00015 portions of the above paper that deal with gist vector computations, 00016 this program provides the remaining structure required to implement 00017 the necessary training and image classification functionalities. 00018 00019 train-bbof has two modes of operation, viz., training and testing. 00020 Training mode consists of four distinct phases: SIFT descriptor 00021 accumulation, K-means clustering, training histograms collection, and 00022 SVM generation. Testing mode operates in a single phase that uses the 00023 results of the clustering, histograms collection and SVM generation 00024 training phases to classify input images into appropriate categories. 00025 */ 00026 00027 // //////////////////////////////////////////////////////////////////// // 00028 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00029 // University of Southern California (USC) and the iLab at USC. // 00030 // See http://iLab.usc.edu for information about this project. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00033 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00034 // in Visual Environments, and Applications'' by Christof Koch and // 00035 // Laurent Itti, California Institute of Technology, 2001 (patent // 00036 // pending; application number 09/912,225 filed July 23, 2001; see // 00037 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00038 // //////////////////////////////////////////////////////////////////// // 00039 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00040 // // 00041 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00042 // redistribute it and/or modify it under the terms of the GNU General // 00043 // Public License as published by the Free Software Foundation; either // 00044 // version 2 of the License, or (at your option) any later version. // 00045 // // 00046 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00047 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00048 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00049 // PURPOSE. See the GNU General Public License for more details. // 00050 // // 00051 // You should have received a copy of the GNU General Public License // 00052 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00053 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00054 // Boston, MA 02111-1307 USA. // 00055 // //////////////////////////////////////////////////////////////////// // 00056 // 00057 // Primary maintainer for this file: Manu Viswanathan <mviswana at usc dot edu> 00058 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Gist/train-bbof.C $ 00059 // $Id: train-bbof.C 14605 2011-03-15 02:25:06Z dparks $ 00060 // 00061 00062 //--------------------------- LIBRARY CHECK ----------------------------- 00063 00064 #if !defined(HAVE_OPENCV) || !defined(INVT_HAVE_LIBTORCH) 00065 00066 #include "Util/log.H" 00067 00068 int main() 00069 { 00070 LERROR("Sorry, this program needs the OpenCV and torch libraries.") ; 00071 return 255 ; 00072 } 00073 00074 #else // the actual program in all its hideous glory 00075 00076 //------------------------------ HEADERS -------------------------------- 00077 00078 #include "Image/OpenCVUtil.H" // must be first to avoid conflicting defs of int64, uint64 00079 00080 #include <fstream> 00081 00082 // Gist specific headers 00083 #include "Neuro/GistEstimatorBeyondBoF.H" 00084 00085 // Other INVT headers 00086 #include "Neuro/StdBrain.H" 00087 #include "Neuro/NeuroOpts.H" 00088 #include "Neuro/NeuroSimEvents.H" 00089 00090 #include "Media/SimFrameSeries.H" 00091 #include "Media/MediaOpts.H" 00092 00093 #include "Simulation/SimEventQueue.H" 00094 #include "Simulation/SimEventQueueConfigurator.H" 00095 00096 #include "Channels/ChannelOpts.H" 00097 #include "Component/ModelManager.H" 00098 #include "Component/ModelOptionDef.H" 00099 00100 #include "Image/Point2D.H" 00101 00102 #include "nub/ref.h" 00103 00104 // torch headers 00105 #include <torch/general.h> 00106 #include <torch/QCTrainer.h> 00107 #include <torch/SVMClassification.h> 00108 #include <torch/Kernel.h> 00109 #include <torch/MatDataSet.h> 00110 00111 // Unix headers 00112 #include <glob.h> 00113 #include <unistd.h> 00114 00115 // Standard C++ headers 00116 #include <sstream> 00117 #include <ios> 00118 #include <numeric> 00119 #include <algorithm> 00120 #include <functional> 00121 #include <map> 00122 #include <vector> 00123 #include <iterator> 00124 #include <stdexcept> 00125 #include <utility> 00126 #include <limits> 00127 #include <cmath> 00128 00129 //------------------------ TEMPLATE UTILITIES --------------------------- 00130 00131 // Convenient (but perhaps not the most efficient) helper to convert 00132 // various data types to strings. 00133 // 00134 // DEVNOTE: Works as long as type T defines an operator << that writes to 00135 // an ostream. 00136 template<typename T> 00137 static std::string to_string(const T& t) 00138 { 00139 std::ostringstream str ; 00140 str << t ; 00141 return str.str() ; 00142 } 00143 00144 /// Read from string. As above, works as long as type T defines an 00145 /// operator >> that reads from an istream. 00146 template<typename T> 00147 static T from_string(const std::string& s, const T& defval = T()) 00148 { 00149 T t(defval) ; 00150 std::istringstream str(s) ; 00151 str >> t ; 00152 return t ; 00153 } 00154 00155 /// from_string() partial specialization for strings. If the client wants 00156 /// a string from the input string, we just return the input string. If 00157 /// we were apply the default version of this template function, we would 00158 /// end up parsing the input string as a whitespace separated string 00159 /// stream and only return the first string from this stream. 00160 template<> 00161 std::string from_string(const std::string& s, const std::string&) 00162 { 00163 return s ; 00164 } 00165 00166 //----------------------- COMMAND LINE OPTIONS -------------------------- 00167 00168 /** 00169 This program has five distinct phases/modes of operation, each one 00170 specified via a suitable non-option command line argument. 00171 Additionally, it supports several command line options to allow users 00172 to tweak various parameters such as the name of the vocabulary file, 00173 the training histograms database, and so on. 00174 */ 00175 namespace { 00176 00177 const ModelOptionCateg MOC_BBOF = { 00178 MOC_SORTPRI_3, 00179 "Options specific to the Beyond Bag-of-Features program", 00180 } ; 00181 00182 /// In the SIFT descriptors accumulation phase, we collect all the 00183 /// descriptors from the training images and store them in a plain text 00184 /// file. 00185 #ifndef BBOF_DEFAULT_TRAINING_DESCRIPTORS_FILE 00186 #define BBOF_DEFAULT_TRAINING_DESCRIPTORS_FILE "sift_descriptors.txt" 00187 #endif 00188 00189 const ModelOptionDef OPT_SiftDescriptors = { 00190 MODOPT_ARG_STRING, "SiftDescriptors", & MOC_BBOF, OPTEXP_CORE, 00191 "This option specifies the name of the file where SIFT descriptors\n" 00192 "for the training images are to be accumulated. This is a plain text\n" 00193 "file containing the descriptors that will be fed into the K-means\n" 00194 "procedure during the second training phase.\n", 00195 "sift-descriptors", '\0', "sift-descriptors-file", 00196 BBOF_DEFAULT_TRAINING_DESCRIPTORS_FILE, 00197 } ; 00198 00199 /// In the second phase of training, we perform K-means clustering on the 00200 /// SIFT descriptors accumulated in the first phase and store the results 00201 /// in yet another plain text file. 00202 #ifndef BBOF_DEFAULT_VOCABULARY_FILE 00203 #define BBOF_DEFAULT_VOCABULARY_FILE "sift_vocabulary.txt" 00204 #endif 00205 00206 const ModelOptionDef OPT_SiftVocabulary = { 00207 MODOPT_ARG_STRING, "SiftVocabulary", & MOC_BBOF, OPTEXP_CORE, 00208 "This option specifies the name of the file in which the \"prototypical\"\n" 00209 "SIFT descriptors are (or are to be) stored. This is a plain text\n" 00210 "file containing the centroids of the K-means clusters, which are used\n" 00211 "during gist vector computation to create feature maps and, subsequently,\n" 00212 "the multi-level histograms using the spatial matching pyramid as\n" 00213 "described in the Lazebnik paper.\n", 00214 "sift-vocabulary", '\0', "sift-vocabulary-file", 00215 BBOF_DEFAULT_VOCABULARY_FILE, 00216 } ; 00217 00218 /// In the third phase of training, we compute and store the gist vectors 00219 /// for the training images. These gist vectors are used in the next 00220 /// training phase as the data points that will be used to create 00221 /// appropriate SVM classifiers for each image category. 00222 #ifndef BBOF_DEFAULT_TRAINING_HISTOGRAMS_FILE 00223 #define BBOF_DEFAULT_TRAINING_HISTOGRAMS_FILE "training_histograms.txt" 00224 #endif 00225 00226 const ModelOptionDef OPT_HistogramsFile = { 00227 MODOPT_ARG_STRING, "HistogramsFile", & MOC_BBOF, OPTEXP_CORE, 00228 "This option specifies the name of the training histograms database,\n" 00229 "a plain text file containing one histogram entry per line. The\n" 00230 "first field specifies the name plus number of the entry (e.g.,\n" 00231 "foo.mpg:1, bar.mpg:5, and so on). The second field specifies the ground\n" 00232 "truth for this particular image. The remaining fields are simply the\n" 00233 "4200 numbers making up the image's flattened out multi-level histogram,\n" 00234 "which serves as its gist vector.\n", 00235 "training-histograms", '\0', "training-histograms-file", 00236 BBOF_DEFAULT_TRAINING_HISTOGRAMS_FILE, 00237 } ; 00238 00239 /// In the fourth phase of training, we create SVM classifiers for each 00240 /// of the categories and store the relevant parameters to a text file 00241 /// for later use during image classification. Each segment will have its 00242 /// own SVM classifier. Therefore, the default value of this symbol is 00243 /// not a good one to use and it should be explicitly specified on the 00244 /// command line. 00245 #ifndef BBOF_DEFAULT_SVM_CLASSIFIER_FILE 00246 #define BBOF_DEFAULT_SVM_CLASSIFIER_FILE "svm_classifier.txt" 00247 #endif 00248 00249 const ModelOptionDef OPT_SvmClassifierFile = { 00250 MODOPT_ARG_STRING, "SvmClassifierFile", & MOC_BBOF, OPTEXP_CORE, 00251 "This option specifies the name of the file that will hold the SVM\n" 00252 "classifier for a given segment. This file is read and written by the\n" 00253 "torch library.", 00254 "svm-classifier", '\0', "svm-classifier-file", 00255 BBOF_DEFAULT_SVM_CLASSIFIER_FILE, 00256 } ; 00257 00258 /// While creating SVM classifiers for each of the categories, we need a 00259 /// temp file to store the training histograms data in the format 00260 /// required by the torch library. Usually, it would be a good idea to 00261 /// explicitly specify this on the command line rather than relying on 00262 /// the compiled in default. 00263 #ifndef BBOF_DEFAULT_SVM_TEMP_FILE 00264 #define BBOF_DEFAULT_SVM_TEMP_FILE "/tmp/train-bbof-torch-dataset.txt" 00265 #endif 00266 00267 const ModelOptionDef OPT_SvmTempFile = { 00268 MODOPT_ARG_STRING, "SvmTempFile", & MOC_BBOF, OPTEXP_CORE, 00269 "This option specifies the name of the temp file that will hold the SVM\n" 00270 "training data in the format required by the torch library. This file is\n" 00271 "is automatically deleted when it is no longer required.", 00272 "svm-temp", '\0', "svm-temp-file", 00273 BBOF_DEFAULT_SVM_TEMP_FILE, 00274 } ; 00275 00276 /// In image classification mode, we write the results to a plain text 00277 /// file. 00278 #ifndef BBOF_DEFAULT_CLASSIFICATION_RESULTS_FILE 00279 #define BBOF_DEFAULT_CLASSIFICATION_RESULTS_FILE "bbof_classifications.txt" 00280 #endif 00281 00282 const ModelOptionDef OPT_ResultsFile = { 00283 MODOPT_ARG_STRING, "ResultsFile", & MOC_BBOF, OPTEXP_CORE, 00284 "This option specifies the name of the classification results file,\n" 00285 "a plain text file containing one result entry per line. The first\n" 00286 "field specifies the name of the input image plus number of the entry,\n" 00287 "(e.g., foo.mpg:1, bar.mpg:5, and so on). Then comes the ground truth\n" 00288 "for this image followed by its classification result.\n", 00289 "results-file", '\0', "classification-results-file", 00290 BBOF_DEFAULT_CLASSIFICATION_RESULTS_FILE, 00291 } ; 00292 00293 /// Several of the data files output by different operational modes of 00294 /// this program require inclusion of the current image/frame name and 00295 /// number and the ground truth segment/category number. These options 00296 /// allow users to specify appropriate values for this required info. 00297 /// 00298 /// NOTE: The default values for these options are not very useful. 00299 /// They really ought to be explicitly specified on the command line. 00300 #ifndef BBOF_DEFAULT_IMAGE_NAME 00301 #define BBOF_DEFAULT_IMAGE_NAME "some_image" 00302 #endif 00303 #ifndef BBOF_DEFAULT_SEGMENT_NUMBER 00304 #define BBOF_DEFAULT_SEGMENT_NUMBER "0" 00305 #endif 00306 00307 const ModelOptionDef OPT_ImageName = { 00308 MODOPT_ARG_STRING, "ImageName", & MOC_BBOF, OPTEXP_CORE, 00309 "This option specifies the \"root\" name for an image. The image number\n" 00310 "will be automatically appended to this \"root\" name with a colon as the\n" 00311 "separator between name and frame number. The current input MPEG file\n" 00312 "name is a good choice for the value of this option.\n", 00313 "image-name", '\0', "input-MPEG-file-name", 00314 BBOF_DEFAULT_IMAGE_NAME, 00315 } ; 00316 00317 const ModelOptionDef OPT_SegmentNumber = { 00318 MODOPT_ARG_STRING, "SegmentNumber", & MOC_BBOF, OPTEXP_CORE, 00319 "This option specifies the segment number for an image in the training\n" 00320 "set. The segment number is used to specify the ground truth for the\n" 00321 "image classification.\n", 00322 "segment-number", '\0', "image-segment-number", 00323 BBOF_DEFAULT_SEGMENT_NUMBER, 00324 } ; 00325 00326 /// classification can be performed either with input images or with 00327 /// precomputed gist vectors. These precomputed vectors are stored in a 00328 /// plain text file. This is the default name of that file. 00329 #ifndef BBOF_DEFAULT_GIST_VECTORS_FILE 00330 #define BBOF_DEFAULT_GIST_VECTORS_FILE "gist_vectors.txt" 00331 #endif 00332 00333 const ModelOptionDef OPT_GistVectors = { 00334 MODOPT_ARG_STRING, "GistVectors", & MOC_BBOF, OPTEXP_CORE, 00335 "This option specifies the name of the file in which gist vectors are\n" 00336 "stored. This option is used when classification must be performed\n" 00337 "using gist vectors computed by some other entity. This is useful, for\n" 00338 "example, when a client program running on some other host computes gist\n" 00339 "vectors and passes these vectors to this program for classification.\n" 00340 "In such situations, we do not need to read images and compute gist\n" 00341 "vectors for them. Instead, we bypass all that and simply perform the\n" 00342 "classification using the precomputed vectors.\n", 00343 "gist-vectors", '\0', "gist-vectors-file", 00344 BBOF_DEFAULT_GIST_VECTORS_FILE, 00345 } ; 00346 00347 /// The vocabulary consists of a bunch of "prototypical" SIFT descriptors 00348 /// that are obtained by clustering the SIFT descriptors for the training 00349 /// images. It is possible to change the size of the vocabulary. But the 00350 /// default is 200. 00351 #ifndef BBOF_DEFAULT_VOCABULARY_SIZE 00352 #define BBOF_DEFAULT_VOCABULARY_SIZE "200" 00353 #endif 00354 00355 const ModelOptionDef OPT_VocabularySize = { 00356 MODOPT_ARG_STRING, "VocabularySize", & MOC_BBOF, OPTEXP_CORE, 00357 "This option specifies the size of the SIFT vocabulary.\n", 00358 "vocabulary-size", '\0', "vocabulary-size", 00359 BBOF_DEFAULT_VOCABULARY_SIZE, 00360 } ; 00361 00362 /** 00363 The different operational modes of this program must be specified as 00364 the one and only non-option command line argument. This "action" 00365 command must be one of the following strings (case-sensitive!): 00366 00367 1. sift -- accumulate the SIFT descriptors for the training images 00368 in the plain text file specified by the --sift-descriptors option. 00369 By default, the descriptors will be accumulated in 00370 ./sift_descriptors.txt. 00371 00372 Additionally, the --image-name and --segment-number options are 00373 required as this information is also recorded in the SIFT 00374 descriptors file. 00375 00376 2. vocab -- compute the SIFT descriptors vocabulary, i.e., the 00377 "protototypical" SIFT descriptors, from the accumulated 00378 descriptors using the OpenCV K-means implementation. 00379 00380 For this action, the --sift-descriptors option specifies the input 00381 file for the K-means while the --sift-vocabulary option specifies 00382 the output file. The defaults are to read from 00383 ./sift_descriptors.txt and write to ./sift_vocabulary.txt. 00384 00385 3. hist -- compute the flattened out multi-level histograms for the 00386 training set. The output is sent to the text file specified by the 00387 --histograms-file option. 00388 00389 The --image-name and --segment-number options are also required. 00390 00391 4. svm -- generate the SVM classifiers for each of the categories. 00392 The --svm-classifier file specifies the name of the file to which 00393 the SVM parameters will be stored. By default, this is 00394 ./svm_classifier.txt. Users should supply a file name different 00395 from the default. Otherwise, this file will get overwritten for 00396 each segment. 00397 00398 The --histograms-file can be used to specify the input data for 00399 this action. 00400 00401 In addition to the above two options, this action also needs the 00402 --svm-temp option to store the histograms data in the format 00403 required by the torch library. The default value is okay for this 00404 option. However, if several instances of this program can be 00405 executing in parallel, it would be best to supply different temp 00406 files explicitly on the command line. 00407 00408 5. classify -- uses the vocabulary and SVM classifiers produced by the 00409 vocab and svm actions to classify the input images streaming in. 00410 Classification results are written, by default, to 00411 ./classification_results.txt; but this can be changed with the 00412 --results-file option. 00413 00414 The --sift-vocabulary and --svm-classifier options can be used to 00415 specify appropriate values for the different pieces of input 00416 required by the classify action. Note that the --svm-classifier 00417 option does not point to a specific classifier, but really is a 00418 "root" name to use. This program will automatically load all the 00419 classifiers that begin with is this root. For example, if the user 00420 specifies --svm-classifier="ACB_svm_classifier", this program will 00421 load all the classifiers whose file names begin with 00422 "ACB_svm_classifier." and append numbers starting at 1. 00423 00424 6. classify_gv -- same action as classify except that input images are 00425 not used; instead, we assume that some other entity (e.g., a client 00426 application running on a phone) has computed the gist vector for 00427 one or more images and is passing these vectors via the 00428 --gist-vector option. 00429 */ 00430 #ifndef BBOF_SIFT_CMD 00431 #define BBOF_SIFT_CMD "sift" 00432 #endif 00433 #ifndef BBOF_VOCABULARY_CMD 00434 #define BBOF_VOCABULARY_CMD "vocab" 00435 #endif 00436 #ifndef BBOF_HISTOGRAM_CMD 00437 #define BBOF_HISTOGRAM_CMD "hist" 00438 #endif 00439 #ifndef BBOF_SVM_CMD 00440 #define BBOF_SVM_CMD "svm" 00441 #endif 00442 #ifndef BBOF_CLASSIFY_CMD 00443 #define BBOF_CLASSIFY_CMD "classify" 00444 #endif 00445 #ifndef BBOF_CLASSIFY_GV_CMD 00446 #define BBOF_CLASSIFY_GV_CMD "classify_gv" 00447 #endif 00448 00449 // For printing usage info 00450 #ifndef BBOF_ACTIONS 00451 #define BBOF_ACTIONS ("{"BBOF_SIFT_CMD"|"BBOF_VOCABULARY_CMD"|"\ 00452 BBOF_HISTOGRAM_CMD"|"BBOF_SVM_CMD"|"\ 00453 BBOF_CLASSIFY_CMD"|"BBOF_CLASSIFY_GV_CMD"}") 00454 #endif 00455 00456 } // end of local namespace encapsulating command line options section 00457 00458 //--------------------- SIMULATION ENCAPSULATION ------------------------ 00459 00460 // The following helper class wraps around the ModelManager and 00461 // associated objects, providing a neatly encapsulated API for the main 00462 // program. 00463 namespace { 00464 00465 class BBoFSimulation { 00466 ModelManager model_manager ; 00467 nub::soft_ref<SimEventQueueConfigurator> configurator ; 00468 nub::soft_ref<StdBrain> brain ; 00469 nub::ref<SimInputFrameSeries> input_frame_series ; 00470 00471 // Various command line options specific to this program 00472 OModelParam<std::string> sd_option ; // --sift-descriptors 00473 OModelParam<std::string> sv_option ; // --sift-vocabulary 00474 OModelParam<std::string> th_option ; // --training-histograms 00475 OModelParam<std::string> sc_option ; // --svm-classifier 00476 OModelParam<std::string> st_option ; // --svm-temp 00477 OModelParam<std::string> rf_option ; // --results-file 00478 OModelParam<std::string> in_option ; // --image-name (not --in!) 00479 OModelParam<std::string> sn_option ; // --segment-number 00480 OModelParam<std::string> gv_option ; // --gist-vectors 00481 OModelParam<std::string> vs_option ; // --vocabulary-size 00482 00483 public : 00484 BBoFSimulation(const std::string& model_name) ; 00485 void parse_command_line(int argc, const char* argv[]) ; 00486 void run() ; 00487 ~BBoFSimulation() ; 00488 00489 private : 00490 // The different actions performed by this program 00491 typedef void (BBoFSimulation::*Action)() ; 00492 typedef std::map<std::string, Action> ActionMap ; 00493 ActionMap action_map ; 00494 00495 void accumulate_sift_descriptors() ; 00496 void compute_sift_vocabulary() ; 00497 void compute_training_histograms() ; 00498 void generate_svm_classifier() ; 00499 void classify_input_images() ; 00500 void classify_using_gist_vectors() ; 00501 00502 // Accessors for retrieving some of the command line arguments 00503 std::string sift_descriptors_file() {return sd_option.getVal() ;} 00504 std::string sift_vocabulary_file() {return sv_option.getVal() ;} 00505 std::string histograms_file() {return th_option.getVal() ;} 00506 std::string svm_classifier_file() {return sc_option.getVal() ;} 00507 std::string svm_temp_file() {return st_option.getVal() ;} 00508 std::string results_file() {return rf_option.getVal() ;} 00509 std::string image_name() {return in_option.getVal() ;} 00510 std::string segment_number() {return sn_option.getVal() ;} 00511 std::string gist_vectors_file() {return gv_option.getVal() ;} 00512 int vocabulary_size() {return from_string<int>(vs_option.getVal()) ;} 00513 } ; 00514 00515 // On instantiation, create the model manager and the simulation's 00516 // various components. 00517 BBoFSimulation::BBoFSimulation(const std::string& model_name) 00518 : model_manager(model_name), 00519 configurator(new SimEventQueueConfigurator(model_manager)), 00520 brain(new StdBrain(model_manager)), 00521 input_frame_series(new SimInputFrameSeries(model_manager)), 00522 sd_option(& OPT_SiftDescriptors, & model_manager), 00523 sv_option(& OPT_SiftVocabulary, & model_manager), 00524 th_option(& OPT_HistogramsFile, & model_manager), 00525 sc_option(& OPT_SvmClassifierFile, & model_manager), 00526 st_option(& OPT_SvmTempFile, & model_manager), 00527 rf_option(& OPT_ResultsFile, & model_manager), 00528 in_option(& OPT_ImageName, & model_manager), 00529 sn_option(& OPT_SegmentNumber, & model_manager), 00530 gv_option(& OPT_GistVectors, & model_manager), 00531 vs_option(& OPT_VocabularySize, & model_manager) 00532 { 00533 model_manager.addSubComponent(configurator) ; 00534 model_manager.addSubComponent(brain) ; 00535 model_manager.addSubComponent(input_frame_series) ; 00536 00537 typedef BBoFSimulation me ; // typing shortcut 00538 action_map[BBOF_SIFT_CMD] = & me::accumulate_sift_descriptors ; 00539 action_map[BBOF_VOCABULARY_CMD] = & me::compute_sift_vocabulary ; 00540 action_map[BBOF_HISTOGRAM_CMD] = & me::compute_training_histograms ; 00541 action_map[BBOF_SVM_CMD] = & me::generate_svm_classifier ; 00542 action_map[BBOF_CLASSIFY_CMD] = & me::classify_input_images ; 00543 action_map[BBOF_CLASSIFY_GV_CMD] = & me::classify_using_gist_vectors ; 00544 } 00545 00546 void BBoFSimulation::parse_command_line(int argc, const char* argv[]) 00547 { 00548 model_manager.setOptionValString(& OPT_GistEstimatorType, "BBoF") ; 00549 00550 model_manager.setOptionValString(& OPT_SiftDescriptors, 00551 BBOF_DEFAULT_TRAINING_DESCRIPTORS_FILE) ; 00552 model_manager.setOptionValString(& OPT_SiftVocabulary, 00553 BBOF_DEFAULT_VOCABULARY_FILE) ; 00554 model_manager.setOptionValString(& OPT_HistogramsFile, 00555 BBOF_DEFAULT_TRAINING_HISTOGRAMS_FILE ) ; 00556 model_manager.setOptionValString(& OPT_SvmClassifierFile, 00557 BBOF_DEFAULT_SVM_CLASSIFIER_FILE ) ; 00558 model_manager.setOptionValString(& OPT_SvmTempFile, 00559 BBOF_DEFAULT_SVM_TEMP_FILE ) ; 00560 model_manager.setOptionValString(& OPT_ResultsFile, 00561 BBOF_DEFAULT_CLASSIFICATION_RESULTS_FILE) ; 00562 00563 model_manager.setOptionValString(& OPT_ImageName, 00564 BBOF_DEFAULT_IMAGE_NAME) ; 00565 model_manager.setOptionValString(& OPT_SegmentNumber, 00566 BBOF_DEFAULT_SEGMENT_NUMBER) ; 00567 00568 model_manager.setOptionValString(& OPT_GistVectors, 00569 BBOF_DEFAULT_GIST_VECTORS_FILE) ; 00570 00571 model_manager.setOptionValString(& OPT_VocabularySize, 00572 BBOF_DEFAULT_VOCABULARY_SIZE) ; 00573 00574 if (! model_manager.parseCommandLine(argc, argv, BBOF_ACTIONS, 1, 1)) 00575 throw std::runtime_error("command line parse error") ; 00576 } 00577 00578 // To run the simulation, we simply dispatch to the function 00579 // corresponding to the action (non-option) command line argument. 00580 void BBoFSimulation::run() 00581 { 00582 std::string cmd(model_manager.getExtraArg(0)) ; 00583 ActionMap::iterator action = action_map.find(cmd) ; 00584 if (action == action_map.end()) 00585 throw std::runtime_error(cmd + ": sorry, unknown action") ; 00586 (this->*(action->second))() ; 00587 } 00588 00589 // Do we really not have to delete the configurator, brain and input 00590 // frame series? If it turns out we do, this empty destructor will have 00591 // to be filled out with the necessary delete calls... 00592 BBoFSimulation::~BBoFSimulation(){} 00593 00594 // Quick helper class to start and stop model manager (useful when 00595 // exceptions are thrown because destructor automatically stops the model 00596 // manager without requiring an explicit call to the stop method prior to 00597 // throwing the exception). 00598 class ModelManagerStarter { 00599 ModelManager& mgr ; 00600 public : 00601 ModelManagerStarter(ModelManager& m) : mgr(m) {mgr.start() ;} 00602 ~ModelManagerStarter() {mgr.stop() ;} 00603 } ; 00604 00605 } // end of local namespace encapsulating simulation encapsulation section 00606 00607 //------------------------------- MAIN ---------------------------------- 00608 00609 int main(int argc, const char* argv[]) 00610 { 00611 MYLOGVERB = LOG_INFO ; // suppress debug messages 00612 try 00613 { 00614 BBoFSimulation S("train-bbof Model") ; 00615 S.parse_command_line(argc, argv) ; 00616 S.run() ; 00617 } 00618 catch (std::exception& e) 00619 { 00620 LFATAL("%s", e.what()) ; 00621 return 1 ; 00622 } 00623 return 0 ; 00624 } 00625 00626 //------------------- SIFT DESCRIPTORS ACCUMULATION --------------------- 00627 00628 // This section contains the code for accumulating the SIFT descriptors 00629 // of the training images, i.e., phase one of training. 00630 namespace { 00631 00632 // Useful shortcut 00633 typedef GistEstimatorBeyondBoF::SiftGrid SiftGrid ; 00634 00635 // Quick helper for storing the SIFT descriptors of the training images 00636 // to a file. 00637 class sift_descriptors_accumulator { 00638 sift_descriptors_accumulator() ; // private to disallow instantiation 00639 ~sift_descriptors_accumulator() ; 00640 public : 00641 static std::string output_file ; 00642 static std::string image_name ; 00643 static int frame_number ; 00644 static std::string segment_number ; 00645 00646 static void write(const SiftGrid&) ; 00647 } ; 00648 00649 // This method implements the simulation's main loop for the "sift" 00650 // action. Prior to starting the main loop though, it configures the 00651 // BBoF gist estimator's training callback, which is triggered at each 00652 // step of the brain's evolution. The BBoF gist estimator passes the 00653 // SIFT descriptors for the current input image to this callback, which 00654 // then proceeds to accumulate them in the file specified by the 00655 // --sift-descriptors option. 00656 void BBoFSimulation::accumulate_sift_descriptors() 00657 { 00658 ModelManagerStarter M(model_manager) ; 00659 00660 nub::soft_ref<GistEstimatorBeyondBoF> ge = 00661 dynCastWeak<GistEstimatorBeyondBoF>( 00662 model_manager.subComponent("GistEstimatorBeyondBoF", MC_RECURSE)) ; 00663 if (ge.isInvalid()) 00664 throw std::runtime_error("can only use GistEstimatorBeyondBoF") ; 00665 00666 typedef sift_descriptors_accumulator acc ; 00667 acc::output_file = sift_descriptors_file() ; 00668 acc::image_name = image_name() ; 00669 acc::segment_number = segment_number() ; 00670 ge->setTrainingHook(acc::write) ; 00671 00672 nub::ref<SimEventQueue> event_queue = configurator->getQ() ; 00673 for(;;) 00674 { 00675 try 00676 { 00677 acc::frame_number = input_frame_series->frame() ; 00678 if (event_queue->evolve() != SIM_CONTINUE) 00679 break ; 00680 } 00681 catch (lfatal_exception&) // if we seek beyond end of frame series 00682 { 00683 return ; // prevent LFATAL induced abortion 00684 } 00685 } 00686 } 00687 00688 // Static data members for storing the SIFT descriptors file name and 00689 // other pertinent info persistently across multiple invocations of the 00690 // GistEstimatorBeyondBoF's training hook. 00691 std::string sift_descriptors_accumulator::output_file ; 00692 std::string sift_descriptors_accumulator::image_name ; 00693 int sift_descriptors_accumulator::frame_number ; 00694 std::string sift_descriptors_accumulator::segment_number ; 00695 00696 // The following function is meant to be used as the 00697 // GistEstimatorBeyondBoF training hook. It simply appends the SIFT 00698 // descriptors grid passed to it (stored as an 00699 // Image<GistEstimatorBeyondBoF::SiftDescriptor>) to the output file. 00700 // The format of this file is as shown below: 00701 // 00702 // MPEG-file-name:frame-number segment-number row col SIFT-descriptor 00703 // 00704 // The MPEG file name should be explicitly specified with the 00705 // --image-name option. The frame number is extracted automatically from 00706 // the input frame series. The segment number represents the ground 00707 // truth for the input image's category and should be specified 00708 // explicitly on the command line with the --segment-number option. The 00709 // row and col values are the SIFT grid coordinates. And, finally, the 00710 // SIFT descriptor itself consists of 128 numbers. 00711 // 00712 // DEVNOTE: We could open the output file once and use that object to 00713 // avoid reopening (by using a static ostream data member rather than a 00714 // static string). However, if the program were to somehow crash halfway 00715 // through, then the training SIFT descriptors output file would be in an 00716 // inconsistent state and rerunning the program can result in appending 00717 // data to a possibly inconsistent dataset, which would only make things 00718 // worse. 00719 // 00720 // Thus, we choose to open and close the output file each time the 00721 // GistEstimatorBeyondBoF training hook is triggered. (Of course, if the 00722 // program cashes while this function is executing, then all bets are 00723 // off; the training SIFT descriptors file's inconsistency will be 00724 // unavoidable in this case.) 00725 void sift_descriptors_accumulator::write(const SiftGrid& G) 00726 { 00727 if (output_file.empty()) 00728 throw std::runtime_error("SIFT descriptors accumulator output file " 00729 "not specified") ; 00730 00731 std::ofstream ofs(output_file.c_str(), std::ios::out | std::ios::app) ; 00732 for (int y = 0; y < G.getHeight(); ++y) 00733 for (int x = 0; x < G.getWidth(); ++x) 00734 ofs << image_name << ':' << frame_number << ' ' 00735 << segment_number << ' ' 00736 << y << ' ' << x << ' ' << G.getVal(x, y) << '\n' ; 00737 } 00738 00739 } // end of local namespace encapsulating SIFT descriptors accumulation section 00740 00741 //-------------------------- OPENCV MATRICES ---------------------------- 00742 00743 namespace { 00744 00745 // Crude encapsulation of OpenCV matrices 00746 class OpenCVMatrix { 00747 CvMat* matrix ; 00748 public : 00749 OpenCVMatrix(int num_rows, int num_cols, int type) ; 00750 OpenCVMatrix(CvMat*) ; 00751 ~OpenCVMatrix() ; 00752 00753 int num_rows() const {return matrix->rows ;} 00754 int num_cols() const {return matrix->cols ;} 00755 int type() const {return CV_MAT_TYPE(matrix->type) ;} 00756 00757 template<typename T> // T must match matrix->type (float for CV_32FC1, etc.) 00758 T get(int i, int j) const {return CV_MAT_ELEM(*matrix, T, i, j) ;} 00759 00760 operator CvMat*() const {return matrix ;} // auto conv. (usually a bad idea) 00761 } ; 00762 00763 OpenCVMatrix::OpenCVMatrix(int num_rows, int num_cols, int type) 00764 : matrix(cvCreateMat(num_rows, num_cols, type)) 00765 { 00766 if (! matrix) 00767 throw std::runtime_error("unable to create OpenCV matrix") ; 00768 } 00769 00770 OpenCVMatrix::OpenCVMatrix(CvMat* M) 00771 : matrix(M) 00772 { 00773 if (! matrix) 00774 throw std::runtime_error("cannot create empty/null matrix") ; 00775 } 00776 00777 OpenCVMatrix::~OpenCVMatrix() 00778 { 00779 cvReleaseMat(& matrix) ; 00780 } 00781 00782 } // end of local namespace encapsulating OpenCV matrices section 00783 00784 //-------------------- SIFT VOCABULARY COMPUTATION ---------------------- 00785 00786 // This section contains the code for the K-means clustering of the SIFT 00787 // descriptors of the training images (i.e., training phase two). 00788 namespace { 00789 00790 // Useful types 00791 typedef Image<float> Vocabulary ; 00792 00793 // Forward declarations 00794 int count_lines(const std::string& file_name) ; 00795 CvMat* load_sift_descriptors(const std::string& file_name, int num_lines) ; 00796 CvMat* kmeans(int K, const OpenCVMatrix& data) ; 00797 void save_vocabulary(const OpenCVMatrix&, const std::string& file_name) ; 00798 00799 // The following method implements the "vocab" action of this program 00800 // for clustering the SIFT descriptors of the training images to obtain 00801 // the 200 "prototypical" SIFT descriptors that form the basis of the 00802 // gist vector computation in terms of these "words" or "vis-terms". 00803 void BBoFSimulation::compute_sift_vocabulary() 00804 { 00805 LINFO("MVN: counting lines in %s", sift_descriptors_file().c_str()) ; 00806 int num_rows = count_lines(sift_descriptors_file()) ; 00807 00808 LINFO("MVN: reading %d SIFT descriptors from %s", 00809 num_rows, sift_descriptors_file().c_str()) ; 00810 OpenCVMatrix sift_descriptors = 00811 load_sift_descriptors(sift_descriptors_file(), num_rows) ; 00812 00813 //GistEstimatorBeyondBoF::num_channels(vocabulary_size()) ; 00814 //const int K = GistEstimatorBeyondBoF::num_channels() ; 00815 const int K = vocabulary_size() ; 00816 LINFO("MVN: doing K-means on SIFT descriptors to get %d clusters", K) ; 00817 OpenCVMatrix vocabulary = kmeans(K, sift_descriptors) ; 00818 00819 LINFO("MVN: K-means done; saving SIFT vocabulary to %s", 00820 sift_vocabulary_file().c_str()) ; 00821 save_vocabulary(vocabulary, sift_vocabulary_file()) ; 00822 } 00823 00824 // The following function reads the SIFT descriptors for the training 00825 // images into an OpenCV matrix. It must know how many lines the SIFT 00826 // descriptors file has. This quantity is the number of rows in resulting 00827 // matrix. The number of columns is simply the size of each SIFT 00828 // descriptor (usually: 128 values make up a SIFT descriptor). 00829 CvMat* load_sift_descriptors(const std::string& file_name, int num_rows) 00830 { 00831 int num_cols = GistEstimatorBeyondBoF::SiftDescriptor::SIZE ; 00832 CvMat* M = cvCreateMat(num_rows, num_cols, CV_32FC1) ; 00833 00834 double d ; std::string dummy ; // for ignoring first four fields 00835 std::ifstream ifs(file_name.c_str()) ; 00836 for (int i = 0; i < num_rows; ++i) 00837 { 00838 std::string str ; 00839 std::getline(ifs, str) ; 00840 if (! ifs || str.empty()) { 00841 if (i == num_rows - 1) // okay; read all rows 00842 break ; 00843 else { // descriptors file missing data or some other error 00844 cvReleaseMat(& M) ; 00845 throw std::runtime_error(file_name + 00846 ": missing SIFT descriptors or other read error") ; 00847 } 00848 } 00849 std::istringstream line(str) ; 00850 line >> dummy >> dummy >> dummy >> dummy ; 00851 00852 for (int j = 0; j < num_cols; ++j) { 00853 if (! line) { 00854 cvReleaseMat(& M) ; 00855 throw std::runtime_error(file_name + 00856 ": missing SIFT descriptor values on line " + to_string(i)) ; 00857 } 00858 line >> d ; 00859 cvmSet(M, i, j, d) ; 00860 } 00861 } 00862 00863 return M ; 00864 } 00865 00866 // K-means parameters 00867 #ifndef BBOF_KMEANS_ITERATIONS 00868 #define BBOF_KMEANS_ITERATIONS (100) 00869 #endif 00870 #ifndef BBOF_KMEANS_PRECISION 00871 #define BBOF_KMEANS_PRECISION (.01) 00872 #endif 00873 00874 // Forward declaration 00875 CvMat* compute_centroids(int K, const OpenCVMatrix& data, 00876 const OpenCVMatrix& cluster_assignments) ; 00877 00878 // This function performs K-means clustering on the supplied data matrix 00879 // and returns the cluster centers. 00880 CvMat* kmeans(int K, const OpenCVMatrix& data) 00881 { 00882 OpenCVMatrix cluster_assignments(data.num_rows(), 1, CV_32SC1) ; 00883 00884 LINFO("MVN: computing K-means cluster assignments with OpenCV") ; 00885 cvKMeans2(data, K, cluster_assignments, 00886 cvTermCriteria(CV_TERMCRIT_EPS + CV_TERMCRIT_ITER, 00887 BBOF_KMEANS_ITERATIONS, BBOF_KMEANS_PRECISION)) ; 00888 00889 LINFO("MVN: cluster assignments done; computing centroids...") ; 00890 return compute_centroids(K, data, cluster_assignments) ; 00891 } 00892 00893 // OpenCV's K-means implementation returns cluster assignments. But we 00894 // need the cluster centroids. This function takes the data matrix and 00895 // cluster assignments and returns the K centroids. 00896 CvMat* compute_centroids(int K, const OpenCVMatrix& data, 00897 const OpenCVMatrix& cluster_assignments) 00898 { 00899 CvMat* centroids = cvCreateMat(K, data.num_cols(), data.type()) ; 00900 cvZero(centroids) ; 00901 00902 std::vector<int> cluster_counts(K) ; 00903 std::fill(cluster_counts.begin(), cluster_counts.end(), 0) ; 00904 00905 for (int i = 0; i < data.num_rows(); ++i) 00906 { 00907 int C = cluster_assignments.get<int>(i, 0) ; 00908 ++cluster_counts[C] ; 00909 00910 // Compute sum of C-th centroid and i-th row 00911 for (int j = 0; j < data.num_cols(); ++j) 00912 cvmSet(centroids, C, j, 00913 cvmGet(centroids, C, j) + data.get<float>(i, j)) ; 00914 } 00915 00916 // Compute the K centroids by averaging the totals accumulated in the 00917 // centroids matrix using the cluster counts. 00918 for (int C = 0; C < K; ++C) 00919 for (int j = 0; j < data.num_cols(); ++j) 00920 cvmSet(centroids, C, j, 00921 cvmGet(centroids, C, j) / cluster_counts[C]) ; 00922 00923 return centroids ; 00924 } 00925 00926 // Write the SIFT vocabulary, row by row, to a plain text file. 00927 void save_vocabulary(const OpenCVMatrix& vocabulary, 00928 const std::string& file_name) 00929 { 00930 std::ofstream ofs(file_name.c_str()) ; 00931 for (int i = 0; i < vocabulary.num_rows(); ++i) { 00932 for (int j = 0; j < vocabulary.num_cols(); ++j) 00933 ofs << vocabulary.get<float>(i, j) << ' ' ; 00934 ofs << '\n' ; 00935 } 00936 } 00937 00938 // Read the SIFT vocabulary from a plain text file into an Image<T> 00939 Vocabulary load_vocabulary(const std::string& file_name) 00940 { 00941 const int M = count_lines(file_name) ; 00942 const int N = GistEstimatorBeyondBoF::SiftDescriptor::SIZE ; 00943 Vocabulary V(N, M, ZEROS) ; 00944 00945 float f ; 00946 std::ifstream ifs(file_name.c_str()) ; 00947 for (int j = 0; j < M; ++j) 00948 for (int i = 0; i < N; ++i) { 00949 if (! ifs) 00950 throw std::runtime_error(file_name + ": out of data?!?") ; 00951 ifs >> f ; 00952 V.setVal(i, j, f) ; 00953 } 00954 00955 return V ; 00956 } 00957 00958 } // end of local namespace encapsulating SIFT vocabulary computation section 00959 00960 //------------------- TRAINING HISTOGRAM PROCESSING --------------------- 00961 00962 // Training is a two step process: first, we use K-means to cluster the 00963 // training set's SIFT descriptors to create the vocabulary of 00964 // "prototypical" SIFT descriptors. Then, we collect the histograms 00965 // counting these "vis-terms" in the training images. The vocabulary of 00966 // prototypical SIFT descriptors (or vis-terms) and the training set's 00967 // histogram "database" are both used for image classification. 00968 namespace { 00969 00970 // Some useful types for dealing with vis-term histograms 00971 typedef Image<double> Histogram ; 00972 00973 // Forward declarations 00974 void save_histogram(const Histogram& histogram, const std::string& file_name, 00975 const std::string& image_name, int frame_number, 00976 const std::string& segment_number) ; 00977 00978 // This method implements the "hist" action of this program. Like the 00979 // accumulate action, it implements a "main loop" for the simulation, 00980 // evolving different components with each iteration. But rather than 00981 // dipping into the GistEstimatorBeyondBoF's processing pipeline, it 00982 // loads the SIFT vocabulary and then uses GistEstimatorBeyondBoF to 00983 // obtain the flattened out multi-level histogram for each of the 00984 // training images. These histograms are saved to the training 00985 // histograms database specified by the --histograms-file option. 00986 void BBoFSimulation::compute_training_histograms() 00987 { 00988 ModelManagerStarter M(model_manager) ; 00989 00990 nub::soft_ref<GistEstimatorBeyondBoF> ge = 00991 dynCastWeak<GistEstimatorBeyondBoF>( 00992 model_manager.subComponent("GistEstimatorBeyondBoF", MC_RECURSE)) ; 00993 if (ge.isInvalid()) 00994 throw std::runtime_error("can only use GistEstimatorBeyondBoF") ; 00995 00996 Vocabulary V = load_vocabulary(sift_vocabulary_file()) ; 00997 ge->setVocabulary(V) ; 00998 LINFO("MVN: loaded SIFT vocabulary of %d vis-terms from %s", 00999 V.getHeight(), sift_vocabulary_file().c_str()) ; 01000 01001 nub::ref<SimEventQueue> event_queue = configurator->getQ() ; 01002 for(;;) 01003 { 01004 try 01005 { 01006 SeC<SimEventGistOutput> gist_out = 01007 event_queue->check<SimEventGistOutput>(brain.get(), 01008 SEQ_UNMARKED | SEQ_MARK, 01009 ge.get()) ; 01010 if (gist_out) // BBoF GE has a gist vector waiting to be picked up 01011 save_histogram(ge->getGist(), histograms_file(), 01012 image_name(), input_frame_series->frame(), 01013 segment_number()) ; 01014 if (event_queue->evolve() != SIM_CONTINUE) 01015 break ; 01016 } 01017 catch (lfatal_exception&) // if we seek beyond end of frame series 01018 { 01019 return ; // prevent LFATAL induced abortion 01020 } 01021 } 01022 } 01023 01024 // This function appends a training image's histogram to the training 01025 // histograms database file under the supplied "entry" name. As we did in 01026 // the SIFT descriptors accumulation function, in order to minimize 01027 // possible inconsistencies in this database, we choose to open and close 01028 // the training histograms file with each invocation of this helper 01029 // rather than keep a persistent ostream object around that obviates the 01030 // need for repeated file open/close operations. 01031 void save_histogram(const Histogram& histogram, const std::string& file_name, 01032 const std::string& image_name, int frame_number, 01033 const std::string& segment_number) 01034 { 01035 std::ofstream ofs(file_name.c_str(), std::ios::out | std::ios::app) ; 01036 ofs << image_name << ':' << frame_number << ' ' 01037 << segment_number << ' ' ; 01038 for (int y = 0; y < histogram.getHeight(); ++y) // should be just one row 01039 for (int x = 0; x < histogram.getWidth(); ++x) // should be 4200 columns 01040 ofs << histogram.getVal(x, y) << ' ' ; 01041 ofs << '\n' ; 01042 } 01043 01044 } // end of local namespace encapsulating training histograms section 01045 01046 //--------------------- SVM CLASSIFIER GENERATION ----------------------- 01047 01048 namespace { 01049 01050 // Forward declarations 01051 void create_torch_dataset(const std::string&, const std::string&, 01052 const std::string&) ; 01053 Torch::SVMClassification* create_torch_classifier(const std::string&) ; 01054 std::string temp_file_name() ; 01055 01056 // The following method implements this program's "svm" action. 01057 void BBoFSimulation::generate_svm_classifier() 01058 { 01059 GistEstimatorBeyondBoF::num_channels(vocabulary_size()) ; 01060 01061 create_torch_dataset(histograms_file(), segment_number(), svm_temp_file()) ; 01062 Torch::SVMClassification* svm = create_torch_classifier(svm_temp_file()) ; 01063 svm->save(svm_classifier_file().c_str()) ; 01064 01065 delete svm ; 01066 unlink(svm_temp_file().c_str()) ; 01067 } 01068 01069 // Quick helper for reading and writing gist vectors from/to a file 01070 struct GistVector { 01071 std::vector<double> values ; 01072 GistVector() ; 01073 } ; 01074 01075 GistVector::GistVector() 01076 : values(GistEstimatorBeyondBoF::gist_vector_size()) 01077 {} 01078 01079 std::istream& operator>>(std::istream& is, GistVector& g) 01080 { 01081 for (int i = 0; i < GistEstimatorBeyondBoF::gist_vector_size(); ++i) 01082 if (is) 01083 is >> g.values[i] ; 01084 else 01085 throw std::runtime_error("missing gist vector data") ; 01086 return is ; 01087 } 01088 01089 std::ostream& operator<<(std::ostream& os, const GistVector& g) 01090 { 01091 for (int i = 0; i < GistEstimatorBeyondBoF::gist_vector_size(); ++i) 01092 os << g.values[i] << ' ' ; 01093 return os ; 01094 } 01095 01096 // The torch library needs its datasets in a particular format. 01097 // Unfortunately, this program works with some other format. The 01098 // following function reads the histograms file saved by an earlier run 01099 // of this program and creates a corresponding torch dataset for 01100 // subsequent training of an SVM classifier for the specified target 01101 // segment. 01102 void create_torch_dataset(const std::string& hist_file, 01103 const std::string& target, 01104 const std::string& torch_dataset) 01105 { 01106 const int n = count_lines(hist_file) ; 01107 01108 std::ifstream in(hist_file.c_str()) ; 01109 std::ofstream out(torch_dataset.c_str()) ; 01110 01111 std::string dummy, segment, str ; 01112 GistVector gist_vector ; 01113 out << n << ' ' << (GistEstimatorBeyondBoF::gist_vector_size() + 1) << '\n'; 01114 for (int i = 0; i < n; ++i) 01115 { 01116 std::getline(in, str) ; 01117 if (! in || str.empty()) { 01118 if (i == n - 1) // okay; all training histograms read successfully 01119 break ; 01120 else { 01121 out.close() ; 01122 unlink(torch_dataset.c_str()) ; 01123 throw std::runtime_error(hist_file + 01124 ": missing data or other read error") ; 01125 } 01126 } 01127 std::istringstream line(str) ; 01128 line >> dummy >> segment >> gist_vector ; 01129 out << gist_vector << ' ' << ((segment == target) ? +1 : -1) << '\n' ; 01130 } 01131 } 01132 01133 // The histogram intersection kernel for matching gist vectors of 01134 // different images. 01135 class HistIntKernel : public Torch::Kernel { 01136 real eval(Torch::Sequence*, Torch::Sequence*) ; 01137 } ; 01138 01139 real HistIntKernel::eval(Torch::Sequence* a, Torch::Sequence* b) 01140 { 01141 real sum = 0 ; 01142 for (int i = 0; i < a->frame_size; ++i) 01143 sum += min(a->frames[0][i], b->frames[0][i]) ; 01144 return sum ; 01145 } 01146 01147 // The following function creates an SVM classifier using the histogram 01148 // intersection kernel defined above. 01149 Torch::SVMClassification* create_torch_classifier(const std::string& dataset) 01150 { 01151 HistIntKernel kernel ; 01152 Torch::SVMClassification* svm = new Torch::SVMClassification(& kernel) ; 01153 Torch::QCTrainer trainer(svm) ; 01154 Torch::MatDataSet data(dataset.c_str(), 01155 GistEstimatorBeyondBoF::gist_vector_size(), 1) ; 01156 trainer.train(& data, 0) ; 01157 return svm ; 01158 } 01159 01160 } // end of local namespace encapsulating SVM classifier generation section 01161 01162 //----------------------- IMAGE CLASSIFICATION -------------------------- 01163 01164 namespace { 01165 01166 // Useful typedefs 01167 typedef std::vector<Torch::SVMClassification*> Classifiers ; 01168 01169 // Forward declarations 01170 Classifiers load_classifiers(std::string, HistIntKernel*) ; 01171 Histogram read_gist_vector(std::istream&) ; 01172 void classify_image(const Histogram&, const Classifiers&, 01173 const std::string&, int, const std::string&, 01174 const std::string&) ; 01175 void nuke_classifiers(Classifiers&) ; 01176 01177 // The following method implements this program's "classify" action. It 01178 // reads the SIFT descriptors vocabulary and computes gist vectors for 01179 // input images using the BBoF gist estimator. Then, it uses the SVM 01180 // classifiers generated by the "svm" action to decide which category the 01181 // input image belongs to. 01182 void BBoFSimulation::classify_input_images() 01183 { 01184 ModelManagerStarter M(model_manager) ; 01185 01186 nub::soft_ref<GistEstimatorBeyondBoF> ge = 01187 dynCastWeak<GistEstimatorBeyondBoF>( 01188 model_manager.subComponent("GistEstimatorBeyondBoF", MC_RECURSE)) ; 01189 if (ge.isInvalid()) 01190 throw std::runtime_error("can only use GistEstimatorBeyondBoF") ; 01191 01192 Vocabulary V = load_vocabulary(sift_vocabulary_file()) ; 01193 ge->setVocabulary(V) ; 01194 LINFO("MVN: loaded SIFT vocabulary of %d vis-terms from %s", 01195 V.getHeight(), sift_vocabulary_file().c_str()) ; 01196 01197 HistIntKernel kernel ; 01198 Classifiers svm_classifiers = 01199 load_classifiers(svm_classifier_file(), & kernel) ; 01200 01201 nub::ref<SimEventQueue> event_queue = configurator->getQ() ; 01202 for(;;) 01203 { 01204 try 01205 { 01206 SeC<SimEventGistOutput> gist_out = 01207 event_queue->check<SimEventGistOutput>(brain.get(), 01208 SEQ_UNMARKED | SEQ_MARK, 01209 ge.get()) ; 01210 if (gist_out) // BBoF GE has a gist vector waiting to be picked up 01211 classify_image(ge->getGist(), svm_classifiers, 01212 image_name(), input_frame_series->frame(), 01213 segment_number(), results_file()) ; 01214 if (event_queue->evolve() != SIM_CONTINUE) 01215 break ; 01216 } 01217 catch (lfatal_exception&) // if we seek beyond end of frame series 01218 { 01219 nuke_classifiers(svm_classifiers) ; 01220 return ; // prevent LFATAL induced abortion 01221 } 01222 } 01223 01224 nuke_classifiers(svm_classifiers) ; 01225 } 01226 01227 // The following method implements this program's "classify_gv" action. 01228 // It reads the SIFT descriptors vocabulary and loads the SVM 01229 // classifiers. Then, it uses the classifiers and the precomputed gist 01230 // vectors to decide to which category the input images belong. 01231 void BBoFSimulation::classify_using_gist_vectors() 01232 { 01233 GistEstimatorBeyondBoF::num_channels(vocabulary_size()) ; 01234 01235 HistIntKernel kernel ; 01236 Classifiers svm_classifiers = 01237 load_classifiers(svm_classifier_file(), & kernel) ; 01238 01239 int line_number = 1 ; 01240 std::ifstream ifs(gist_vectors_file().c_str()) ; 01241 while (ifs) 01242 try 01243 { 01244 classify_image(read_gist_vector(ifs), svm_classifiers, 01245 image_name(), line_number++, 01246 segment_number(), results_file()) ; 01247 } 01248 catch (std::exception&) // ifs ran out of gist vector data 01249 { 01250 } 01251 01252 nuke_classifiers(svm_classifiers) ; 01253 } 01254 01255 // Given an input image's gist vector and the SVM classifiers for all the 01256 // categories, this function checks which categories the input image 01257 // belongs to and writes the results to the classification results file. 01258 void classify_image(const Histogram& gist_vector, 01259 const Classifiers& classifiers, 01260 const std::string& image_name, int frame_number, 01261 const std::string& ground_truth, 01262 const std::string& results_file) 01263 { 01264 std::ofstream ofs(results_file.c_str(), std::ios::out | std::ios::app) ; 01265 ofs << image_name << ':' << frame_number << ' ' << ground_truth << ' ' ; 01266 01267 Torch::Sequence gv(1, GistEstimatorBeyondBoF::gist_vector_size()) ; 01268 std::copy(gist_vector.begin(), gist_vector.end(), gv.frames[0]) ; 01269 01270 int n = 0 ; // num categories into which input image can be classified 01271 const int N = classifiers.size() ; 01272 for (int i = 0; i < N; ++i) { 01273 classifiers[i]->forward(& gv) ; 01274 if (classifiers[i]->outputs->frames[0][0] > 0) { 01275 ofs << (i+1) << ' ' ; 01276 ++n ; 01277 } 01278 } 01279 01280 if (! n) // input image could not be classified into any category 01281 ofs << '0' ; 01282 ofs << '\n' ; 01283 } 01284 01285 // This function loads all the SVM classifiers beginning with the 01286 // specified "root" name. Here's how this is supposed to work: 01287 // 01288 // Let's say we 9 categories. Earlier runs of this program ought to have 01289 // created 9 SVM classifiers. Usually, these would be named 01290 // "XXX_svm_classifier.1", "XXX_svm_classifier.2", "XXX_svm_classifier.3" 01291 // and so on. This function will read each of these files back into 01292 // memory using the torch library (which is what created those files in 01293 // the first place). 01294 // 01295 // The kernel for each of these SVM classifiers is the histogram 01296 // intersection kernel as described in the Lazebnik paper. 01297 Classifiers 01298 load_classifiers(std::string classifiers_root_name, HistIntKernel* kernel) 01299 { 01300 classifiers_root_name += ".*" ; 01301 glob_t buf ; 01302 if (glob(classifiers_root_name.c_str(), 0, 0, & buf) != 0) 01303 throw std::runtime_error("couldn't find/load the SVM classifiers") ; 01304 01305 const int N = buf.gl_pathc ; 01306 Classifiers classifiers(N) ; 01307 for (int i = 0; i < N; ++i) { 01308 classifiers[i] = new Torch::SVMClassification(kernel) ; 01309 classifiers[i]->load(buf.gl_pathv[i]) ; 01310 } 01311 01312 globfree(& buf) ; 01313 return classifiers ; 01314 } 01315 01316 // Delete all SVM classifier objects created in previous function 01317 void nuke_classifiers(Classifiers& C) 01318 { 01319 const int N = C.size() ; 01320 for (int i = 0; i < N; ++i) 01321 delete C[i] ; 01322 } 01323 01324 // This function reads a gist vector from the specified input stream 01325 Histogram read_gist_vector(std::istream& is) 01326 { 01327 GistVector G ; 01328 is >> G ; 01329 01330 Histogram H(GistEstimatorBeyondBoF::gist_vector_size(), 1, NO_INIT) ; 01331 std::copy(G.values.begin(), G.values.end(), H.beginw()) ; 01332 return H ; 01333 } 01334 01335 } // end of local namespace encapsulating image classification section 01336 01337 //-------------------------- UTILITY ROUTINES --------------------------- 01338 01339 namespace { 01340 01341 // Count the number of lines in a file (wc -l) 01342 int count_lines(const std::string& file_name) 01343 { 01344 int n = -1 ; // because EOF is read after final \n (1 extra iter. of loop) 01345 std::ifstream ifs(file_name.c_str()) ; 01346 01347 std::string dummy ; 01348 while (ifs) { 01349 getline(ifs, dummy) ; 01350 ++n ; 01351 } 01352 return n ; 01353 } 01354 01355 // Returns true if a floating point number is near zero 01356 bool is_zero(double d) 01357 { 01358 return std::fabs(d) <= std::numeric_limits<double>::epsilon() ; 01359 } 01360 01361 } // end of local namespace encapsulating utility routines section 01362 01363 //----------------------------------------------------------------------- 01364 01365 #endif // #if !defined(HAVE_OPENCV) || !defined(INVT_HAVE_TORCH) 01366 01367 /* So things look consistent in everyone's emacs... */ 01368 /* Local Variables: */ 01369 /* indent-tabs-mode: nil */ 01370 /* End: */