train-bbof.C

Go to the documentation of this file.
00001 /**
00002    \file Gist/train-bbof.C
00003 
00004    \brief Interface for training and testing GistEstimatorBeyondBoF.
00005 
00006    The train-bbof program in conjunction with the GistEstimatorBeyondBoF
00007    class implements the following paper within the INVT framework:
00008 
00009    Lazebnik, S., Schmid, C., Ponce, J.
00010    Beyond Bags of Features: Spatial Pyramid Matching for Recognizing
00011       Natural Scene Catgories
00012    CVPR, 2006.
00013 
00014    Whereas the GistEstimatorBeyondBoF class is only concerned with the
00015    portions of the above paper that deal with gist vector computations,
00016    this program provides the remaining structure required to implement
00017    the necessary training and image classification functionalities.
00018 
00019    train-bbof has two modes of operation, viz., training and testing.
00020    Training mode consists of four distinct phases: SIFT descriptor
00021    accumulation, K-means clustering, training histograms collection, and
00022    SVM generation. Testing mode operates in a single phase that uses the
00023    results of the clustering, histograms collection and SVM generation
00024    training phases to classify input images into appropriate categories.
00025 */
00026 
00027 // //////////////////////////////////////////////////////////////////// //
00028 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the //
00029 // University of Southern California (USC) and the iLab at USC.         //
00030 // See http://iLab.usc.edu for information about this project.          //
00031 // //////////////////////////////////////////////////////////////////// //
00032 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00033 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00034 // in Visual Environments, and Applications'' by Christof Koch and      //
00035 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00036 // pending; application number 09/912,225 filed July 23, 2001; see      //
00037 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00038 // //////////////////////////////////////////////////////////////////// //
00039 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00040 //                                                                      //
00041 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00042 // redistribute it and/or modify it under the terms of the GNU General  //
00043 // Public License as published by the Free Software Foundation; either  //
00044 // version 2 of the License, or (at your option) any later version.     //
00045 //                                                                      //
00046 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00047 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00048 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00049 // PURPOSE.  See the GNU General Public License for more details.       //
00050 //                                                                      //
00051 // You should have received a copy of the GNU General Public License    //
00052 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00053 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00054 // Boston, MA 02111-1307 USA.                                           //
00055 // //////////////////////////////////////////////////////////////////// //
00056 //
00057 // Primary maintainer for this file: Manu Viswanathan <mviswana at usc dot edu>
00058 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Gist/train-bbof.C $
00059 // $Id: train-bbof.C 14605 2011-03-15 02:25:06Z dparks $
00060 //
00061 
00062 //--------------------------- LIBRARY CHECK -----------------------------
00063 
00064 #if !defined(HAVE_OPENCV) || !defined(INVT_HAVE_LIBTORCH)
00065 
00066 #include "Util/log.H"
00067 
00068 int main()
00069 {
00070    LERROR("Sorry, this program needs the OpenCV and torch libraries.") ;
00071    return 255 ;
00072 }
00073 
00074 #else // the actual program in all its hideous glory
00075 
00076 //------------------------------ HEADERS --------------------------------
00077 
00078 #include "Image/OpenCVUtil.H"  // must be first to avoid conflicting defs of int64, uint64
00079 
00080 #include <fstream>
00081 
00082 // Gist specific headers
00083 #include "Neuro/GistEstimatorBeyondBoF.H"
00084 
00085 // Other INVT headers
00086 #include "Neuro/StdBrain.H"
00087 #include "Neuro/NeuroOpts.H"
00088 #include "Neuro/NeuroSimEvents.H"
00089 
00090 #include "Media/SimFrameSeries.H"
00091 #include "Media/MediaOpts.H"
00092 
00093 #include "Simulation/SimEventQueue.H"
00094 #include "Simulation/SimEventQueueConfigurator.H"
00095 
00096 #include "Channels/ChannelOpts.H"
00097 #include "Component/ModelManager.H"
00098 #include "Component/ModelOptionDef.H"
00099 
00100 #include "Image/Point2D.H"
00101 
00102 #include "nub/ref.h"
00103 
00104 // torch headers
00105 #include <torch/general.h>
00106 #include <torch/QCTrainer.h>
00107 #include <torch/SVMClassification.h>
00108 #include <torch/Kernel.h>
00109 #include <torch/MatDataSet.h>
00110 
00111 // Unix headers
00112 #include <glob.h>
00113 #include <unistd.h>
00114 
00115 // Standard C++ headers
00116 #include <sstream>
00117 #include <ios>
00118 #include <numeric>
00119 #include <algorithm>
00120 #include <functional>
00121 #include <map>
00122 #include <vector>
00123 #include <iterator>
00124 #include <stdexcept>
00125 #include <utility>
00126 #include <limits>
00127 #include <cmath>
00128 
00129 //------------------------ TEMPLATE UTILITIES ---------------------------
00130 
00131 // Convenient (but perhaps not the most efficient) helper to convert
00132 // various data types to strings.
00133 //
00134 // DEVNOTE: Works as long as type T defines an operator << that writes to
00135 // an ostream.
00136 template<typename T>
00137 static std::string to_string(const T& t)
00138 {
00139    std::ostringstream str ;
00140    str << t ;
00141    return str.str() ;
00142 }
00143 
00144 /// Read from string. As above, works as long as type T defines an
00145 /// operator >> that reads from an istream.
00146 template<typename T>
00147 static T from_string(const std::string& s, const T& defval = T())
00148 {
00149    T t(defval) ;
00150    std::istringstream str(s) ;
00151    str >> t ;
00152    return t ;
00153 }
00154 
00155 /// from_string() partial specialization for strings. If the client wants
00156 /// a string from the input string, we just return the input string. If
00157 /// we were apply the default version of this template function, we would
00158 /// end up parsing the input string as a whitespace separated string
00159 /// stream and only return the first string from this stream.
00160 template<>
00161 std::string from_string(const std::string& s, const std::string&)
00162 {
00163    return s ;
00164 }
00165 
00166 //----------------------- COMMAND LINE OPTIONS --------------------------
00167 
00168 /**
00169    This program has five distinct phases/modes of operation, each one
00170    specified via a suitable non-option command line argument.
00171    Additionally, it supports several command line options to allow users
00172    to tweak various parameters such as the name of the vocabulary file,
00173    the training histograms database, and so on.
00174 */
00175 namespace {
00176 
00177 const ModelOptionCateg MOC_BBOF = {
00178    MOC_SORTPRI_3,
00179    "Options specific to the Beyond Bag-of-Features program",
00180 } ;
00181 
00182 /// In the SIFT descriptors accumulation phase, we collect all the
00183 /// descriptors from the training images and store them in a plain text
00184 /// file.
00185 #ifndef BBOF_DEFAULT_TRAINING_DESCRIPTORS_FILE
00186    #define BBOF_DEFAULT_TRAINING_DESCRIPTORS_FILE "sift_descriptors.txt"
00187 #endif
00188 
00189 const ModelOptionDef OPT_SiftDescriptors = {
00190    MODOPT_ARG_STRING, "SiftDescriptors", & MOC_BBOF, OPTEXP_CORE,
00191    "This option specifies the name of the file where SIFT descriptors\n"
00192    "for the training images are to be accumulated. This is a plain text\n"
00193    "file containing the descriptors that will be fed into the K-means\n"
00194    "procedure during the second training phase.\n",
00195    "sift-descriptors", '\0', "sift-descriptors-file",
00196    BBOF_DEFAULT_TRAINING_DESCRIPTORS_FILE,
00197 } ;
00198 
00199 /// In the second phase of training, we perform K-means clustering on the
00200 /// SIFT descriptors accumulated in the first phase and store the results
00201 /// in yet another plain text file.
00202 #ifndef BBOF_DEFAULT_VOCABULARY_FILE
00203    #define BBOF_DEFAULT_VOCABULARY_FILE "sift_vocabulary.txt"
00204 #endif
00205 
00206 const ModelOptionDef OPT_SiftVocabulary = {
00207    MODOPT_ARG_STRING, "SiftVocabulary", & MOC_BBOF, OPTEXP_CORE,
00208    "This option specifies the name of the file in which the \"prototypical\"\n"
00209    "SIFT descriptors are (or are to be) stored. This is a plain text\n"
00210    "file containing the centroids of the K-means clusters, which are used\n"
00211    "during gist vector computation to create feature maps and, subsequently,\n"
00212    "the multi-level histograms using the spatial matching pyramid as\n"
00213    "described in the Lazebnik paper.\n",
00214    "sift-vocabulary", '\0', "sift-vocabulary-file",
00215    BBOF_DEFAULT_VOCABULARY_FILE,
00216 } ;
00217 
00218 /// In the third phase of training, we compute and store the gist vectors
00219 /// for the training images. These gist vectors are used in the next
00220 /// training phase as the data points that will be used to create
00221 /// appropriate SVM classifiers for each image category.
00222 #ifndef BBOF_DEFAULT_TRAINING_HISTOGRAMS_FILE
00223    #define BBOF_DEFAULT_TRAINING_HISTOGRAMS_FILE "training_histograms.txt"
00224 #endif
00225 
00226 const ModelOptionDef OPT_HistogramsFile = {
00227    MODOPT_ARG_STRING, "HistogramsFile", & MOC_BBOF, OPTEXP_CORE,
00228    "This option specifies the name of the training histograms database,\n"
00229    "a plain text file containing one histogram entry per line. The\n"
00230    "first field specifies the name plus number of the entry (e.g.,\n"
00231    "foo.mpg:1, bar.mpg:5, and so on). The second field specifies the ground\n"
00232    "truth for this particular image. The remaining fields are simply the\n"
00233    "4200 numbers making up the image's flattened out multi-level histogram,\n"
00234    "which serves as its gist vector.\n",
00235    "training-histograms", '\0', "training-histograms-file",
00236    BBOF_DEFAULT_TRAINING_HISTOGRAMS_FILE,
00237 } ;
00238 
00239 /// In the fourth phase of training, we create SVM classifiers for each
00240 /// of the categories and store the relevant parameters to a text file
00241 /// for later use during image classification. Each segment will have its
00242 /// own SVM classifier. Therefore, the default value of this symbol is
00243 /// not a good one to use and it should be explicitly specified on the
00244 /// command line.
00245 #ifndef BBOF_DEFAULT_SVM_CLASSIFIER_FILE
00246    #define BBOF_DEFAULT_SVM_CLASSIFIER_FILE "svm_classifier.txt"
00247 #endif
00248 
00249 const ModelOptionDef OPT_SvmClassifierFile = {
00250    MODOPT_ARG_STRING, "SvmClassifierFile", & MOC_BBOF, OPTEXP_CORE,
00251    "This option specifies the name of the file that will hold the SVM\n"
00252    "classifier for a given segment. This file is read and written by the\n"
00253    "torch library.",
00254    "svm-classifier", '\0', "svm-classifier-file",
00255    BBOF_DEFAULT_SVM_CLASSIFIER_FILE,
00256 } ;
00257 
00258 /// While creating SVM classifiers for each of the categories, we need a
00259 /// temp file to store the training histograms data in the format
00260 /// required by the torch library. Usually, it would be a good idea to
00261 /// explicitly specify this on the command line rather than relying on
00262 /// the compiled in default.
00263 #ifndef BBOF_DEFAULT_SVM_TEMP_FILE
00264    #define BBOF_DEFAULT_SVM_TEMP_FILE "/tmp/train-bbof-torch-dataset.txt"
00265 #endif
00266 
00267 const ModelOptionDef OPT_SvmTempFile = {
00268    MODOPT_ARG_STRING, "SvmTempFile", & MOC_BBOF, OPTEXP_CORE,
00269    "This option specifies the name of the temp file that will hold the SVM\n"
00270    "training data in the format required by the torch library. This file is\n"
00271    "is automatically deleted when it is no longer required.",
00272    "svm-temp", '\0', "svm-temp-file",
00273    BBOF_DEFAULT_SVM_TEMP_FILE,
00274 } ;
00275 
00276 /// In image classification mode, we write the results to a plain text
00277 /// file.
00278 #ifndef BBOF_DEFAULT_CLASSIFICATION_RESULTS_FILE
00279    #define BBOF_DEFAULT_CLASSIFICATION_RESULTS_FILE "bbof_classifications.txt"
00280 #endif
00281 
00282 const ModelOptionDef OPT_ResultsFile = {
00283    MODOPT_ARG_STRING, "ResultsFile", & MOC_BBOF, OPTEXP_CORE,
00284    "This option specifies the name of the classification results file,\n"
00285    "a plain text file containing one result entry per line. The first\n"
00286    "field specifies the name of the input image plus number of the entry,\n"
00287    "(e.g., foo.mpg:1, bar.mpg:5, and so on). Then comes the ground truth\n"
00288    "for this image followed by its classification result.\n",
00289    "results-file", '\0', "classification-results-file",
00290    BBOF_DEFAULT_CLASSIFICATION_RESULTS_FILE,
00291 } ;
00292 
00293 /// Several of the data files output by different operational modes of
00294 /// this program require inclusion of the current image/frame name and
00295 /// number and the ground truth segment/category number. These options
00296 /// allow users to specify appropriate values for this required info.
00297 ///
00298 /// NOTE: The default values for these options are not very useful.
00299 /// They really ought to be explicitly specified on the command line.
00300 #ifndef BBOF_DEFAULT_IMAGE_NAME
00301    #define BBOF_DEFAULT_IMAGE_NAME "some_image"
00302 #endif
00303 #ifndef BBOF_DEFAULT_SEGMENT_NUMBER
00304    #define BBOF_DEFAULT_SEGMENT_NUMBER "0"
00305 #endif
00306 
00307 const ModelOptionDef OPT_ImageName = {
00308    MODOPT_ARG_STRING, "ImageName", & MOC_BBOF, OPTEXP_CORE,
00309    "This option specifies the \"root\" name for an image. The image number\n"
00310    "will be automatically appended to this \"root\" name with a colon as the\n"
00311    "separator between name and frame number. The current input MPEG file\n"
00312    "name is a good choice for the value of this option.\n",
00313    "image-name", '\0', "input-MPEG-file-name",
00314    BBOF_DEFAULT_IMAGE_NAME,
00315 } ;
00316 
00317 const ModelOptionDef OPT_SegmentNumber = {
00318    MODOPT_ARG_STRING, "SegmentNumber", & MOC_BBOF, OPTEXP_CORE,
00319    "This option specifies the segment number for an image in the training\n"
00320    "set. The segment number is used to specify the ground truth for the\n"
00321    "image classification.\n",
00322    "segment-number", '\0', "image-segment-number",
00323    BBOF_DEFAULT_SEGMENT_NUMBER,
00324 } ;
00325 
00326 /// classification can be performed either with input images or with
00327 /// precomputed gist vectors. These precomputed vectors are stored in a
00328 /// plain text file. This is the default name of that file.
00329 #ifndef BBOF_DEFAULT_GIST_VECTORS_FILE
00330    #define BBOF_DEFAULT_GIST_VECTORS_FILE "gist_vectors.txt"
00331 #endif
00332 
00333 const ModelOptionDef OPT_GistVectors = {
00334    MODOPT_ARG_STRING, "GistVectors", & MOC_BBOF, OPTEXP_CORE,
00335    "This option specifies the name of the file in which gist vectors are\n"
00336    "stored. This option is used when classification must be performed\n"
00337    "using gist vectors computed by some other entity. This is useful, for\n"
00338    "example, when a client program running on some other host computes gist\n"
00339    "vectors and passes these vectors to this program for classification.\n"
00340    "In such situations, we do not need to read images and compute gist\n"
00341    "vectors for them. Instead, we bypass all that and simply perform the\n"
00342    "classification using the precomputed vectors.\n",
00343    "gist-vectors", '\0', "gist-vectors-file",
00344    BBOF_DEFAULT_GIST_VECTORS_FILE,
00345 } ;
00346 
00347 /// The vocabulary consists of a bunch of "prototypical" SIFT descriptors
00348 /// that are obtained by clustering the SIFT descriptors for the training
00349 /// images. It is possible to change the size of the vocabulary. But the
00350 /// default is 200.
00351 #ifndef BBOF_DEFAULT_VOCABULARY_SIZE
00352    #define BBOF_DEFAULT_VOCABULARY_SIZE "200"
00353 #endif
00354 
00355 const ModelOptionDef OPT_VocabularySize = {
00356    MODOPT_ARG_STRING, "VocabularySize", & MOC_BBOF, OPTEXP_CORE,
00357    "This option specifies the size of the SIFT vocabulary.\n",
00358    "vocabulary-size", '\0', "vocabulary-size",
00359    BBOF_DEFAULT_VOCABULARY_SIZE,
00360 } ;
00361 
00362 /**
00363    The different operational modes of this program must be specified as
00364    the one and only non-option command line argument. This "action"
00365    command must be one of the following strings (case-sensitive!):
00366 
00367    1. sift -- accumulate the SIFT descriptors for the training images
00368       in the plain text file specified by the --sift-descriptors option.
00369       By default, the descriptors will be accumulated in
00370       ./sift_descriptors.txt.
00371 
00372       Additionally, the --image-name and --segment-number options are
00373       required as this information is also recorded in the SIFT
00374       descriptors file.
00375 
00376    2. vocab -- compute the SIFT descriptors vocabulary, i.e., the
00377       "protototypical" SIFT descriptors, from the accumulated
00378       descriptors using the OpenCV K-means implementation.
00379 
00380       For this action, the --sift-descriptors option specifies the input
00381       file for the K-means while the --sift-vocabulary option specifies
00382       the output file. The defaults are to read from
00383       ./sift_descriptors.txt and write to ./sift_vocabulary.txt.
00384 
00385    3. hist -- compute the flattened out multi-level histograms for the
00386       training set. The output is sent to the text file specified by the
00387       --histograms-file option.
00388 
00389       The --image-name and --segment-number options are also required.
00390 
00391    4. svm -- generate the SVM classifiers for each of the categories.
00392       The --svm-classifier file specifies the name of the file to which
00393       the SVM parameters will be stored. By default, this is
00394       ./svm_classifier.txt. Users should supply a file name different
00395       from the default. Otherwise, this file will get overwritten for
00396       each segment.
00397 
00398       The --histograms-file can be used to specify the input data for
00399       this action.
00400 
00401       In addition to the above two options, this action also needs the
00402       --svm-temp option to store the histograms data in the format
00403       required by the torch library. The default value is okay for this
00404       option. However, if several instances of this program can be
00405       executing in parallel, it would be best to supply different temp
00406       files explicitly on the command line.
00407 
00408    5. classify -- uses the vocabulary and SVM classifiers produced by the
00409       vocab and svm actions to classify the input images streaming in.
00410       Classification results are written, by default, to
00411       ./classification_results.txt; but this can be changed with the
00412       --results-file option.
00413 
00414       The --sift-vocabulary and --svm-classifier options can be used to
00415       specify appropriate values for the different pieces of input
00416       required by the classify action. Note that the --svm-classifier
00417       option does not point to a specific classifier, but really is a
00418       "root" name to use. This program will automatically load all the
00419       classifiers that begin with is this root. For example, if the user
00420       specifies --svm-classifier="ACB_svm_classifier", this program will
00421       load all the classifiers whose file names begin with
00422       "ACB_svm_classifier." and append numbers starting at 1.
00423 
00424    6. classify_gv -- same action as classify except that input images are
00425       not used; instead, we assume that some other entity (e.g., a client
00426       application running on a phone) has computed the gist vector for
00427       one or more images and is passing these vectors via the
00428       --gist-vector option.
00429 */
00430 #ifndef BBOF_SIFT_CMD
00431    #define BBOF_SIFT_CMD "sift"
00432 #endif
00433 #ifndef BBOF_VOCABULARY_CMD
00434    #define BBOF_VOCABULARY_CMD "vocab"
00435 #endif
00436 #ifndef BBOF_HISTOGRAM_CMD
00437    #define BBOF_HISTOGRAM_CMD "hist"
00438 #endif
00439 #ifndef BBOF_SVM_CMD
00440    #define BBOF_SVM_CMD "svm"
00441 #endif
00442 #ifndef BBOF_CLASSIFY_CMD
00443    #define BBOF_CLASSIFY_CMD "classify"
00444 #endif
00445 #ifndef BBOF_CLASSIFY_GV_CMD
00446    #define BBOF_CLASSIFY_GV_CMD "classify_gv"
00447 #endif
00448 
00449 // For printing usage info
00450 #ifndef BBOF_ACTIONS
00451    #define BBOF_ACTIONS ("{"BBOF_SIFT_CMD"|"BBOF_VOCABULARY_CMD"|"\
00452                           BBOF_HISTOGRAM_CMD"|"BBOF_SVM_CMD"|"\
00453                           BBOF_CLASSIFY_CMD"|"BBOF_CLASSIFY_GV_CMD"}")
00454 #endif
00455 
00456 } // end of local namespace encapsulating command line options section
00457 
00458 //--------------------- SIMULATION ENCAPSULATION ------------------------
00459 
00460 // The following helper class wraps around the ModelManager and
00461 // associated objects, providing a neatly encapsulated API for the main
00462 // program.
00463 namespace {
00464 
00465 class BBoFSimulation {
00466    ModelManager model_manager ;
00467    nub::soft_ref<SimEventQueueConfigurator> configurator ;
00468    nub::soft_ref<StdBrain> brain ;
00469    nub::ref<SimInputFrameSeries> input_frame_series ;
00470 
00471    // Various command line options specific to this program
00472    OModelParam<std::string> sd_option ; // --sift-descriptors
00473    OModelParam<std::string> sv_option ; // --sift-vocabulary
00474    OModelParam<std::string> th_option ; // --training-histograms
00475    OModelParam<std::string> sc_option ; // --svm-classifier
00476    OModelParam<std::string> st_option ; // --svm-temp
00477    OModelParam<std::string> rf_option ; // --results-file
00478    OModelParam<std::string> in_option ; // --image-name (not --in!)
00479    OModelParam<std::string> sn_option ; // --segment-number
00480    OModelParam<std::string> gv_option ; // --gist-vectors
00481    OModelParam<std::string> vs_option ; // --vocabulary-size
00482 
00483 public :
00484    BBoFSimulation(const std::string& model_name) ;
00485    void parse_command_line(int argc, const char* argv[]) ;
00486    void run() ;
00487    ~BBoFSimulation() ;
00488 
00489 private :
00490    // The different actions performed by this program
00491    typedef void (BBoFSimulation::*Action)() ;
00492    typedef std::map<std::string, Action> ActionMap ;
00493    ActionMap action_map ;
00494 
00495    void accumulate_sift_descriptors() ;
00496    void compute_sift_vocabulary() ;
00497    void compute_training_histograms() ;
00498    void generate_svm_classifier() ;
00499    void classify_input_images() ;
00500    void classify_using_gist_vectors() ;
00501 
00502    // Accessors for retrieving some of the command line arguments
00503    std::string sift_descriptors_file() {return sd_option.getVal() ;}
00504    std::string sift_vocabulary_file()  {return sv_option.getVal() ;}
00505    std::string histograms_file()       {return th_option.getVal() ;}
00506    std::string svm_classifier_file()   {return sc_option.getVal() ;}
00507    std::string svm_temp_file()         {return st_option.getVal() ;}
00508    std::string results_file()          {return rf_option.getVal() ;}
00509    std::string image_name()            {return in_option.getVal() ;}
00510    std::string segment_number()        {return sn_option.getVal() ;}
00511    std::string gist_vectors_file()     {return gv_option.getVal() ;}
00512    int vocabulary_size() {return from_string<int>(vs_option.getVal()) ;}
00513 } ;
00514 
00515 // On instantiation, create the model manager and the simulation's
00516 // various components.
00517 BBoFSimulation::BBoFSimulation(const std::string& model_name)
00518    : model_manager(model_name),
00519      configurator(new SimEventQueueConfigurator(model_manager)),
00520      brain(new StdBrain(model_manager)),
00521      input_frame_series(new SimInputFrameSeries(model_manager)),
00522      sd_option(& OPT_SiftDescriptors,   & model_manager),
00523      sv_option(& OPT_SiftVocabulary,    & model_manager),
00524      th_option(& OPT_HistogramsFile,    & model_manager),
00525      sc_option(& OPT_SvmClassifierFile, & model_manager),
00526      st_option(& OPT_SvmTempFile,       & model_manager),
00527      rf_option(& OPT_ResultsFile,       & model_manager),
00528      in_option(& OPT_ImageName,         & model_manager),
00529      sn_option(& OPT_SegmentNumber,     & model_manager),
00530      gv_option(& OPT_GistVectors,       & model_manager),
00531      vs_option(& OPT_VocabularySize,    & model_manager)
00532 {
00533    model_manager.addSubComponent(configurator) ;
00534    model_manager.addSubComponent(brain) ;
00535    model_manager.addSubComponent(input_frame_series) ;
00536 
00537    typedef BBoFSimulation me ; // typing shortcut
00538    action_map[BBOF_SIFT_CMD]        = & me::accumulate_sift_descriptors ;
00539    action_map[BBOF_VOCABULARY_CMD]  = & me::compute_sift_vocabulary ;
00540    action_map[BBOF_HISTOGRAM_CMD]   = & me::compute_training_histograms ;
00541    action_map[BBOF_SVM_CMD]         = & me::generate_svm_classifier ;
00542    action_map[BBOF_CLASSIFY_CMD]    = & me::classify_input_images ;
00543    action_map[BBOF_CLASSIFY_GV_CMD] = & me::classify_using_gist_vectors ;
00544 }
00545 
00546 void BBoFSimulation::parse_command_line(int argc, const char* argv[])
00547 {
00548    model_manager.setOptionValString(& OPT_GistEstimatorType, "BBoF") ;
00549 
00550    model_manager.setOptionValString(& OPT_SiftDescriptors,
00551                                     BBOF_DEFAULT_TRAINING_DESCRIPTORS_FILE) ;
00552    model_manager.setOptionValString(& OPT_SiftVocabulary,
00553                                     BBOF_DEFAULT_VOCABULARY_FILE) ;
00554    model_manager.setOptionValString(& OPT_HistogramsFile,
00555                                     BBOF_DEFAULT_TRAINING_HISTOGRAMS_FILE ) ;
00556    model_manager.setOptionValString(& OPT_SvmClassifierFile,
00557                                     BBOF_DEFAULT_SVM_CLASSIFIER_FILE ) ;
00558    model_manager.setOptionValString(& OPT_SvmTempFile,
00559                                     BBOF_DEFAULT_SVM_TEMP_FILE ) ;
00560    model_manager.setOptionValString(& OPT_ResultsFile,
00561                                     BBOF_DEFAULT_CLASSIFICATION_RESULTS_FILE) ;
00562 
00563    model_manager.setOptionValString(& OPT_ImageName,
00564                                     BBOF_DEFAULT_IMAGE_NAME) ;
00565    model_manager.setOptionValString(& OPT_SegmentNumber,
00566                                     BBOF_DEFAULT_SEGMENT_NUMBER) ;
00567 
00568    model_manager.setOptionValString(& OPT_GistVectors,
00569                                     BBOF_DEFAULT_GIST_VECTORS_FILE) ;
00570 
00571    model_manager.setOptionValString(& OPT_VocabularySize,
00572                                     BBOF_DEFAULT_VOCABULARY_SIZE) ;
00573 
00574    if (! model_manager.parseCommandLine(argc, argv, BBOF_ACTIONS, 1, 1))
00575       throw std::runtime_error("command line parse error") ;
00576 }
00577 
00578 // To run the simulation, we simply dispatch to the function
00579 // corresponding to the action (non-option) command line argument.
00580 void BBoFSimulation::run()
00581 {
00582    std::string cmd(model_manager.getExtraArg(0)) ;
00583    ActionMap::iterator action = action_map.find(cmd) ;
00584    if (action == action_map.end())
00585       throw std::runtime_error(cmd + ": sorry, unknown action") ;
00586    (this->*(action->second))() ;
00587 }
00588 
00589 // Do we really not have to delete the configurator, brain and input
00590 // frame series? If it turns out we do, this empty destructor will have
00591 // to be filled out with the necessary delete calls...
00592 BBoFSimulation::~BBoFSimulation(){}
00593 
00594 // Quick helper class to start and stop model manager (useful when
00595 // exceptions are thrown because destructor automatically stops the model
00596 // manager without requiring an explicit call to the stop method prior to
00597 // throwing the exception).
00598 class ModelManagerStarter {
00599    ModelManager& mgr ;
00600 public :
00601    ModelManagerStarter(ModelManager& m) : mgr(m) {mgr.start() ;}
00602    ~ModelManagerStarter() {mgr.stop() ;}
00603 } ;
00604 
00605 } // end of local namespace encapsulating simulation encapsulation section
00606 
00607 //------------------------------- MAIN ----------------------------------
00608 
00609 int main(int argc, const char* argv[])
00610 {
00611    MYLOGVERB = LOG_INFO ; // suppress debug messages
00612    try
00613    {
00614       BBoFSimulation S("train-bbof Model") ;
00615       S.parse_command_line(argc, argv) ;
00616       S.run() ;
00617    }
00618    catch (std::exception& e)
00619    {
00620      LFATAL("%s", e.what()) ;
00621       return 1 ;
00622    }
00623    return 0 ;
00624 }
00625 
00626 //------------------- SIFT DESCRIPTORS ACCUMULATION ---------------------
00627 
00628 // This section contains the code for accumulating the SIFT descriptors
00629 // of the training images, i.e., phase one of training.
00630 namespace {
00631 
00632 // Useful shortcut
00633 typedef GistEstimatorBeyondBoF::SiftGrid SiftGrid ;
00634 
00635 // Quick helper for storing the SIFT descriptors of the training images
00636 // to a file.
00637 class sift_descriptors_accumulator {
00638    sift_descriptors_accumulator() ; // private to disallow instantiation
00639    ~sift_descriptors_accumulator() ;
00640 public :
00641    static std::string output_file ;
00642    static std::string image_name ;
00643    static int         frame_number ;
00644    static std::string segment_number ;
00645 
00646    static void write(const SiftGrid&) ;
00647 } ;
00648 
00649 // This method implements the simulation's main loop for the "sift"
00650 // action. Prior to starting the main loop though, it configures the
00651 // BBoF gist estimator's training callback, which is triggered at each
00652 // step of the brain's evolution. The BBoF gist estimator passes the
00653 // SIFT descriptors for the current input image to this callback, which
00654 // then proceeds to accumulate them in the file specified by the
00655 // --sift-descriptors option.
00656 void BBoFSimulation::accumulate_sift_descriptors()
00657 {
00658    ModelManagerStarter M(model_manager) ;
00659 
00660    nub::soft_ref<GistEstimatorBeyondBoF> ge =
00661       dynCastWeak<GistEstimatorBeyondBoF>(
00662          model_manager.subComponent("GistEstimatorBeyondBoF", MC_RECURSE)) ;
00663    if (ge.isInvalid())
00664       throw std::runtime_error("can only use GistEstimatorBeyondBoF") ;
00665 
00666    typedef sift_descriptors_accumulator acc ;
00667    acc::output_file    = sift_descriptors_file() ;
00668    acc::image_name     = image_name() ;
00669    acc::segment_number = segment_number() ;
00670    ge->setTrainingHook(acc::write) ;
00671 
00672    nub::ref<SimEventQueue> event_queue = configurator->getQ() ;
00673    for(;;)
00674    {
00675       try
00676       {
00677          acc::frame_number = input_frame_series->frame() ;
00678          if (event_queue->evolve() != SIM_CONTINUE)
00679             break ;
00680       }
00681       catch (lfatal_exception&) // if we seek beyond end of frame series
00682       {
00683          return ; // prevent LFATAL induced abortion
00684       }
00685    }
00686 }
00687 
00688 // Static data members for storing the SIFT descriptors file name and
00689 // other pertinent info persistently across multiple invocations of the
00690 // GistEstimatorBeyondBoF's training hook.
00691 std::string sift_descriptors_accumulator::output_file ;
00692 std::string sift_descriptors_accumulator::image_name ;
00693 int         sift_descriptors_accumulator::frame_number ;
00694 std::string sift_descriptors_accumulator::segment_number ;
00695 
00696 // The following function is meant to be used as the
00697 // GistEstimatorBeyondBoF training hook. It simply appends the SIFT
00698 // descriptors grid passed to it (stored as an
00699 // Image<GistEstimatorBeyondBoF::SiftDescriptor>) to the output file.
00700 // The format of this file is as shown below:
00701 //
00702 // MPEG-file-name:frame-number  segment-number  row  col  SIFT-descriptor
00703 //
00704 // The MPEG file name should be explicitly specified with the
00705 // --image-name option. The frame number is extracted automatically from
00706 // the input frame series. The segment number represents the ground
00707 // truth for the input image's category and should be specified
00708 // explicitly on the command line with the --segment-number option. The
00709 // row and col values are the SIFT grid coordinates. And, finally, the
00710 // SIFT descriptor itself consists of 128 numbers.
00711 //
00712 // DEVNOTE: We could open the output file once and use that object to
00713 // avoid reopening (by using a static ostream data member rather than a
00714 // static string). However, if the program were to somehow crash halfway
00715 // through, then the training SIFT descriptors output file would be in an
00716 // inconsistent state and rerunning the program can result in appending
00717 // data to a possibly inconsistent dataset, which would only make things
00718 // worse.
00719 //
00720 // Thus, we choose to open and close the output file each time the
00721 // GistEstimatorBeyondBoF training hook is triggered. (Of course, if the
00722 // program cashes while this function is executing, then all bets are
00723 // off; the training SIFT descriptors file's inconsistency will be
00724 // unavoidable in this case.)
00725 void sift_descriptors_accumulator::write(const SiftGrid& G)
00726 {
00727    if (output_file.empty())
00728       throw std::runtime_error("SIFT descriptors accumulator output file "
00729                                "not specified") ;
00730 
00731    std::ofstream ofs(output_file.c_str(), std::ios::out | std::ios::app) ;
00732    for (int y = 0; y < G.getHeight(); ++y)
00733       for (int x = 0; x < G.getWidth(); ++x)
00734          ofs << image_name << ':' << frame_number << ' '
00735              << segment_number << ' '
00736              << y << ' ' << x << ' ' << G.getVal(x, y) << '\n' ;
00737 }
00738 
00739 } // end of local namespace encapsulating SIFT descriptors accumulation section
00740 
00741 //-------------------------- OPENCV MATRICES ----------------------------
00742 
00743 namespace {
00744 
00745 // Crude encapsulation of OpenCV matrices
00746 class OpenCVMatrix {
00747    CvMat* matrix ;
00748 public :
00749    OpenCVMatrix(int num_rows, int num_cols, int type) ;
00750    OpenCVMatrix(CvMat*) ;
00751    ~OpenCVMatrix() ;
00752 
00753    int num_rows() const {return matrix->rows ;}
00754    int num_cols() const {return matrix->cols ;}
00755    int type()     const {return CV_MAT_TYPE(matrix->type) ;}
00756 
00757    template<typename T> // T must match matrix->type (float for CV_32FC1, etc.)
00758    T get(int i, int j) const {return CV_MAT_ELEM(*matrix, T, i, j) ;}
00759 
00760    operator CvMat*() const {return matrix ;} // auto conv. (usually a bad idea)
00761 } ;
00762 
00763 OpenCVMatrix::OpenCVMatrix(int num_rows, int num_cols, int type)
00764    : matrix(cvCreateMat(num_rows, num_cols, type))
00765 {
00766    if (! matrix)
00767       throw std::runtime_error("unable to create OpenCV matrix") ;
00768 }
00769 
00770 OpenCVMatrix::OpenCVMatrix(CvMat* M)
00771    : matrix(M)
00772 {
00773    if (! matrix)
00774       throw std::runtime_error("cannot create empty/null matrix") ;
00775 }
00776 
00777 OpenCVMatrix::~OpenCVMatrix()
00778 {
00779    cvReleaseMat(& matrix) ;
00780 }
00781 
00782 } // end of local namespace encapsulating OpenCV matrices section
00783 
00784 //-------------------- SIFT VOCABULARY COMPUTATION ----------------------
00785 
00786 // This section contains the code for the K-means clustering of the SIFT
00787 // descriptors of the training images (i.e., training phase two).
00788 namespace {
00789 
00790 // Useful types
00791 typedef Image<float> Vocabulary ;
00792 
00793 // Forward declarations
00794 int    count_lines(const std::string& file_name) ;
00795 CvMat* load_sift_descriptors(const std::string& file_name, int num_lines) ;
00796 CvMat* kmeans(int K, const OpenCVMatrix& data) ;
00797 void   save_vocabulary(const OpenCVMatrix&, const std::string& file_name) ;
00798 
00799 // The following method implements the "vocab" action of this program
00800 // for clustering the SIFT descriptors of the training images to obtain
00801 // the 200 "prototypical" SIFT descriptors that form the basis of the
00802 // gist vector computation in terms of these "words" or "vis-terms".
00803 void BBoFSimulation::compute_sift_vocabulary()
00804 {
00805    LINFO("MVN: counting lines in %s", sift_descriptors_file().c_str()) ;
00806    int num_rows = count_lines(sift_descriptors_file()) ;
00807 
00808    LINFO("MVN: reading %d SIFT descriptors from %s",
00809          num_rows, sift_descriptors_file().c_str()) ;
00810    OpenCVMatrix sift_descriptors =
00811       load_sift_descriptors(sift_descriptors_file(), num_rows) ;
00812 
00813    //GistEstimatorBeyondBoF::num_channels(vocabulary_size()) ;
00814    //const int K = GistEstimatorBeyondBoF::num_channels() ;
00815    const int K = vocabulary_size() ;
00816    LINFO("MVN: doing K-means on SIFT descriptors to get %d clusters", K) ;
00817    OpenCVMatrix vocabulary = kmeans(K, sift_descriptors) ;
00818 
00819    LINFO("MVN: K-means done; saving SIFT vocabulary to %s",
00820          sift_vocabulary_file().c_str()) ;
00821    save_vocabulary(vocabulary, sift_vocabulary_file()) ;
00822 }
00823 
00824 // The following function reads the SIFT descriptors for the training
00825 // images into an OpenCV matrix. It must know how many lines the SIFT
00826 // descriptors file has. This quantity is the number of rows in resulting
00827 // matrix. The number of columns is simply the size of each SIFT
00828 // descriptor (usually: 128 values make up a SIFT descriptor).
00829 CvMat* load_sift_descriptors(const std::string& file_name, int num_rows)
00830 {
00831    int num_cols = GistEstimatorBeyondBoF::SiftDescriptor::SIZE ;
00832    CvMat* M = cvCreateMat(num_rows, num_cols, CV_32FC1) ;
00833 
00834    double d ; std::string dummy ; // for ignoring first four fields
00835    std::ifstream ifs(file_name.c_str()) ;
00836    for (int i = 0; i < num_rows; ++i)
00837    {
00838       std::string str ;
00839       std::getline(ifs, str) ;
00840       if (! ifs || str.empty()) {
00841         if (i == num_rows - 1) // okay; read all rows
00842           break ;
00843         else { // descriptors file missing data or some other error
00844           cvReleaseMat(& M) ;
00845           throw std::runtime_error(file_name +
00846                                    ": missing SIFT descriptors or other read error") ;
00847         }
00848       }
00849       std::istringstream line(str) ;
00850       line >> dummy >> dummy >> dummy >> dummy ;
00851 
00852       for (int j = 0; j < num_cols; ++j) {
00853          if (! line) {
00854             cvReleaseMat(& M) ;
00855             throw std::runtime_error(file_name +
00856                ": missing SIFT descriptor values on line " + to_string(i)) ;
00857          }
00858          line >> d ;
00859          cvmSet(M, i, j, d) ;
00860       }
00861    }
00862 
00863    return M ;
00864 }
00865 
00866 // K-means parameters
00867 #ifndef BBOF_KMEANS_ITERATIONS
00868    #define BBOF_KMEANS_ITERATIONS (100)
00869 #endif
00870 #ifndef BBOF_KMEANS_PRECISION
00871    #define BBOF_KMEANS_PRECISION (.01)
00872 #endif
00873 
00874 // Forward declaration
00875 CvMat* compute_centroids(int K, const OpenCVMatrix& data,
00876                          const OpenCVMatrix& cluster_assignments) ;
00877 
00878 // This function performs K-means clustering on the supplied data matrix
00879 // and returns the cluster centers.
00880 CvMat* kmeans(int K, const OpenCVMatrix& data)
00881 {
00882    OpenCVMatrix cluster_assignments(data.num_rows(), 1, CV_32SC1) ;
00883 
00884    LINFO("MVN: computing K-means cluster assignments with OpenCV") ;
00885    cvKMeans2(data, K, cluster_assignments,
00886              cvTermCriteria(CV_TERMCRIT_EPS + CV_TERMCRIT_ITER,
00887                             BBOF_KMEANS_ITERATIONS, BBOF_KMEANS_PRECISION)) ;
00888 
00889    LINFO("MVN: cluster assignments done; computing centroids...") ;
00890    return compute_centroids(K, data, cluster_assignments) ;
00891 }
00892 
00893 // OpenCV's K-means implementation returns cluster assignments. But we
00894 // need the cluster centroids. This function takes the data matrix and
00895 // cluster assignments and returns the K centroids.
00896 CvMat* compute_centroids(int K, const OpenCVMatrix& data,
00897                          const OpenCVMatrix& cluster_assignments)
00898 {
00899    CvMat* centroids = cvCreateMat(K, data.num_cols(), data.type()) ;
00900    cvZero(centroids) ;
00901 
00902    std::vector<int> cluster_counts(K) ;
00903    std::fill(cluster_counts.begin(), cluster_counts.end(), 0) ;
00904 
00905    for (int i = 0; i < data.num_rows(); ++i)
00906    {
00907       int C = cluster_assignments.get<int>(i, 0) ;
00908       ++cluster_counts[C] ;
00909 
00910       // Compute sum of C-th centroid and i-th row
00911       for (int j = 0; j < data.num_cols(); ++j)
00912          cvmSet(centroids, C, j,
00913                 cvmGet(centroids, C, j) + data.get<float>(i, j)) ;
00914    }
00915 
00916    // Compute the K centroids by averaging the totals accumulated in the
00917    // centroids matrix using the cluster counts.
00918    for (int C = 0; C < K; ++C)
00919       for (int j = 0; j < data.num_cols(); ++j)
00920          cvmSet(centroids, C, j,
00921                 cvmGet(centroids, C, j) / cluster_counts[C]) ;
00922 
00923    return centroids ;
00924 }
00925 
00926 // Write the SIFT vocabulary, row by row, to a plain text file.
00927 void save_vocabulary(const OpenCVMatrix& vocabulary,
00928                      const std::string& file_name)
00929 {
00930    std::ofstream ofs(file_name.c_str()) ;
00931    for (int i = 0; i < vocabulary.num_rows(); ++i) {
00932       for (int j = 0; j < vocabulary.num_cols(); ++j)
00933          ofs << vocabulary.get<float>(i, j) << ' ' ;
00934       ofs << '\n' ;
00935    }
00936 }
00937 
00938 // Read the SIFT vocabulary from a plain text file into an Image<T>
00939 Vocabulary load_vocabulary(const std::string& file_name)
00940 {
00941    const int M = count_lines(file_name) ;
00942    const int N = GistEstimatorBeyondBoF::SiftDescriptor::SIZE ;
00943    Vocabulary V(N, M, ZEROS) ;
00944 
00945    float f ;
00946    std::ifstream ifs(file_name.c_str()) ;
00947    for (int j = 0; j < M; ++j)
00948       for (int i = 0; i < N; ++i) {
00949          if (! ifs)
00950             throw std::runtime_error(file_name + ": out of data?!?") ;
00951          ifs >> f ;
00952          V.setVal(i, j, f) ;
00953       }
00954 
00955    return V ;
00956 }
00957 
00958 } // end of local namespace encapsulating SIFT vocabulary computation section
00959 
00960 //------------------- TRAINING HISTOGRAM PROCESSING ---------------------
00961 
00962 // Training is a two step process: first, we use K-means to cluster the
00963 // training set's SIFT descriptors to create the vocabulary of
00964 // "prototypical" SIFT descriptors. Then, we collect the histograms
00965 // counting these "vis-terms" in the training images. The vocabulary of
00966 // prototypical SIFT descriptors (or vis-terms) and the training set's
00967 // histogram "database" are both used for image classification.
00968 namespace {
00969 
00970 // Some useful types for dealing with vis-term histograms
00971 typedef Image<double> Histogram ;
00972 
00973 // Forward declarations
00974 void save_histogram(const Histogram& histogram, const std::string& file_name,
00975                     const std::string& image_name, int frame_number,
00976                     const std::string& segment_number) ;
00977 
00978 // This method implements the "hist" action of this program. Like the
00979 // accumulate action, it implements a "main loop" for the simulation,
00980 // evolving different components with each iteration. But rather than
00981 // dipping into the GistEstimatorBeyondBoF's processing pipeline, it
00982 // loads the SIFT vocabulary and then uses GistEstimatorBeyondBoF to
00983 // obtain the flattened out multi-level histogram for each of the
00984 // training images. These histograms are saved to the training
00985 // histograms database specified by the --histograms-file option.
00986 void BBoFSimulation::compute_training_histograms()
00987 {
00988    ModelManagerStarter M(model_manager) ;
00989 
00990    nub::soft_ref<GistEstimatorBeyondBoF> ge =
00991       dynCastWeak<GistEstimatorBeyondBoF>(
00992          model_manager.subComponent("GistEstimatorBeyondBoF", MC_RECURSE)) ;
00993    if (ge.isInvalid())
00994       throw std::runtime_error("can only use GistEstimatorBeyondBoF") ;
00995 
00996    Vocabulary V = load_vocabulary(sift_vocabulary_file()) ;
00997    ge->setVocabulary(V) ;
00998    LINFO("MVN: loaded SIFT vocabulary of %d vis-terms from %s",
00999          V.getHeight(), sift_vocabulary_file().c_str()) ;
01000 
01001    nub::ref<SimEventQueue> event_queue = configurator->getQ() ;
01002    for(;;)
01003    {
01004       try
01005       {
01006          SeC<SimEventGistOutput> gist_out =
01007             event_queue->check<SimEventGistOutput>(brain.get(),
01008                                                    SEQ_UNMARKED | SEQ_MARK,
01009                                                    ge.get()) ;
01010          if (gist_out) // BBoF GE has a gist vector waiting to be picked up
01011             save_histogram(ge->getGist(), histograms_file(),
01012                            image_name(), input_frame_series->frame(),
01013                            segment_number()) ;
01014          if (event_queue->evolve() != SIM_CONTINUE)
01015             break ;
01016       }
01017       catch (lfatal_exception&) // if we seek beyond end of frame series
01018       {
01019          return ; // prevent LFATAL induced abortion
01020       }
01021    }
01022 }
01023 
01024 // This function appends a training image's histogram to the training
01025 // histograms database file under the supplied "entry" name. As we did in
01026 // the SIFT descriptors accumulation function, in order to minimize
01027 // possible inconsistencies in this database, we choose to open and close
01028 // the training histograms file with each invocation of this helper
01029 // rather than keep a persistent ostream object around that obviates the
01030 // need for repeated file open/close operations.
01031 void save_histogram(const Histogram& histogram, const std::string& file_name,
01032                     const std::string& image_name, int frame_number,
01033                     const std::string& segment_number)
01034 {
01035    std::ofstream ofs(file_name.c_str(), std::ios::out | std::ios::app) ;
01036    ofs << image_name << ':' << frame_number << ' '
01037        << segment_number << ' ' ;
01038    for (int y = 0; y < histogram.getHeight(); ++y) // should be just one row
01039       for (int x = 0; x < histogram.getWidth(); ++x) // should be 4200 columns
01040          ofs << histogram.getVal(x, y) << ' ' ;
01041    ofs << '\n' ;
01042 }
01043 
01044 } // end of local namespace encapsulating training histograms section
01045 
01046 //--------------------- SVM CLASSIFIER GENERATION -----------------------
01047 
01048 namespace {
01049 
01050 // Forward declarations
01051 void create_torch_dataset(const std::string&, const std::string&,
01052                           const std::string&) ;
01053 Torch::SVMClassification* create_torch_classifier(const std::string&) ;
01054 std::string temp_file_name() ;
01055 
01056 // The following method implements this program's "svm" action.
01057 void BBoFSimulation::generate_svm_classifier()
01058 {
01059    GistEstimatorBeyondBoF::num_channels(vocabulary_size()) ;
01060 
01061    create_torch_dataset(histograms_file(), segment_number(), svm_temp_file()) ;
01062    Torch::SVMClassification* svm = create_torch_classifier(svm_temp_file()) ;
01063    svm->save(svm_classifier_file().c_str()) ;
01064 
01065    delete svm ;
01066    unlink(svm_temp_file().c_str()) ;
01067 }
01068 
01069 // Quick helper for reading and writing gist vectors from/to a file
01070 struct GistVector {
01071    std::vector<double> values ;
01072    GistVector() ;
01073 } ;
01074 
01075 GistVector::GistVector()
01076    : values(GistEstimatorBeyondBoF::gist_vector_size())
01077 {}
01078 
01079 std::istream& operator>>(std::istream& is, GistVector& g)
01080 {
01081    for (int i = 0; i < GistEstimatorBeyondBoF::gist_vector_size(); ++i)
01082       if (is)
01083          is >> g.values[i] ;
01084       else
01085          throw std::runtime_error("missing gist vector data") ;
01086    return is ;
01087 }
01088 
01089 std::ostream& operator<<(std::ostream& os, const GistVector& g)
01090 {
01091    for (int i = 0; i < GistEstimatorBeyondBoF::gist_vector_size(); ++i)
01092       os << g.values[i] << ' ' ;
01093    return os ;
01094 }
01095 
01096 // The torch library needs its datasets in a particular format.
01097 // Unfortunately, this program works with some other format. The
01098 // following function reads the histograms file saved by an earlier run
01099 // of this program and creates a corresponding torch dataset for
01100 // subsequent training of an SVM classifier for the specified target
01101 // segment.
01102 void create_torch_dataset(const std::string& hist_file,
01103                           const std::string& target,
01104                           const std::string& torch_dataset)
01105 {
01106    const int n = count_lines(hist_file) ;
01107 
01108    std::ifstream in(hist_file.c_str()) ;
01109    std::ofstream out(torch_dataset.c_str()) ;
01110 
01111    std::string dummy, segment, str ;
01112    GistVector gist_vector ;
01113    out << n << ' ' << (GistEstimatorBeyondBoF::gist_vector_size() + 1) << '\n';
01114    for (int i = 0; i < n; ++i)
01115    {
01116       std::getline(in, str) ;
01117       if (! in || str.empty()) {
01118         if (i == n - 1) // okay; all training histograms read successfully
01119           break ;
01120         else {
01121           out.close() ;
01122           unlink(torch_dataset.c_str()) ;
01123           throw std::runtime_error(hist_file +
01124                                    ": missing data or other read error") ;
01125         }
01126       }
01127       std::istringstream line(str) ;
01128       line >> dummy >> segment >> gist_vector ;
01129       out << gist_vector << ' ' << ((segment == target) ? +1 : -1) << '\n' ;
01130    }
01131 }
01132 
01133 // The histogram intersection kernel for matching gist vectors of
01134 // different images.
01135 class HistIntKernel : public Torch::Kernel {
01136   real eval(Torch::Sequence*, Torch::Sequence*) ;
01137 } ;
01138 
01139 real HistIntKernel::eval(Torch::Sequence* a, Torch::Sequence* b)
01140 {
01141    real sum = 0 ;
01142    for (int i = 0; i < a->frame_size; ++i)
01143       sum += min(a->frames[0][i], b->frames[0][i]) ;
01144    return sum ;
01145 }
01146 
01147 // The following function creates an SVM classifier using the histogram
01148 // intersection kernel defined above.
01149 Torch::SVMClassification* create_torch_classifier(const std::string& dataset)
01150 {
01151    HistIntKernel kernel ;
01152    Torch::SVMClassification* svm = new Torch::SVMClassification(& kernel) ;
01153    Torch::QCTrainer trainer(svm) ;
01154    Torch::MatDataSet data(dataset.c_str(),
01155                           GistEstimatorBeyondBoF::gist_vector_size(), 1) ;
01156    trainer.train(& data, 0) ;
01157    return svm ;
01158 }
01159 
01160 } // end of local namespace encapsulating SVM classifier generation section
01161 
01162 //----------------------- IMAGE CLASSIFICATION --------------------------
01163 
01164 namespace {
01165 
01166 // Useful typedefs
01167 typedef std::vector<Torch::SVMClassification*> Classifiers ;
01168 
01169 // Forward declarations
01170 Classifiers load_classifiers(std::string, HistIntKernel*) ;
01171 Histogram read_gist_vector(std::istream&) ;
01172 void classify_image(const Histogram&, const Classifiers&,
01173                     const std::string&, int, const std::string&,
01174                     const std::string&) ;
01175 void nuke_classifiers(Classifiers&) ;
01176 
01177 // The following method implements this program's "classify" action. It
01178 // reads the SIFT descriptors vocabulary and computes gist vectors for
01179 // input images using the BBoF gist estimator. Then, it uses the SVM
01180 // classifiers generated by the "svm" action to decide which category the
01181 // input image belongs to.
01182 void BBoFSimulation::classify_input_images()
01183 {
01184    ModelManagerStarter M(model_manager) ;
01185 
01186    nub::soft_ref<GistEstimatorBeyondBoF> ge =
01187       dynCastWeak<GistEstimatorBeyondBoF>(
01188          model_manager.subComponent("GistEstimatorBeyondBoF", MC_RECURSE)) ;
01189    if (ge.isInvalid())
01190       throw std::runtime_error("can only use GistEstimatorBeyondBoF") ;
01191 
01192    Vocabulary V = load_vocabulary(sift_vocabulary_file()) ;
01193    ge->setVocabulary(V) ;
01194    LINFO("MVN: loaded SIFT vocabulary of %d vis-terms from %s",
01195          V.getHeight(), sift_vocabulary_file().c_str()) ;
01196 
01197    HistIntKernel kernel ;
01198    Classifiers svm_classifiers =
01199       load_classifiers(svm_classifier_file(), & kernel) ;
01200 
01201    nub::ref<SimEventQueue> event_queue = configurator->getQ() ;
01202    for(;;)
01203    {
01204       try
01205       {
01206          SeC<SimEventGistOutput> gist_out =
01207             event_queue->check<SimEventGistOutput>(brain.get(),
01208                                                    SEQ_UNMARKED | SEQ_MARK,
01209                                                    ge.get()) ;
01210          if (gist_out) // BBoF GE has a gist vector waiting to be picked up
01211             classify_image(ge->getGist(), svm_classifiers,
01212                            image_name(), input_frame_series->frame(),
01213                            segment_number(), results_file()) ;
01214          if (event_queue->evolve() != SIM_CONTINUE)
01215             break ;
01216       }
01217       catch (lfatal_exception&) // if we seek beyond end of frame series
01218       {
01219          nuke_classifiers(svm_classifiers) ;
01220          return ; // prevent LFATAL induced abortion
01221       }
01222    }
01223 
01224    nuke_classifiers(svm_classifiers) ;
01225 }
01226 
01227 // The following method implements this program's "classify_gv" action.
01228 // It reads the SIFT descriptors vocabulary and loads the SVM
01229 // classifiers. Then, it uses the classifiers and the precomputed gist
01230 // vectors to decide to which category the input images belong.
01231 void BBoFSimulation::classify_using_gist_vectors()
01232 {
01233    GistEstimatorBeyondBoF::num_channels(vocabulary_size()) ;
01234 
01235    HistIntKernel kernel ;
01236    Classifiers svm_classifiers =
01237       load_classifiers(svm_classifier_file(), & kernel) ;
01238 
01239    int line_number = 1 ;
01240    std::ifstream ifs(gist_vectors_file().c_str()) ;
01241    while (ifs)
01242       try
01243       {
01244          classify_image(read_gist_vector(ifs), svm_classifiers,
01245                         image_name(), line_number++,
01246                         segment_number(), results_file()) ;
01247       }
01248       catch (std::exception&) // ifs ran out of gist vector data
01249       {
01250       }
01251 
01252    nuke_classifiers(svm_classifiers) ;
01253 }
01254 
01255 // Given an input image's gist vector and the SVM classifiers for all the
01256 // categories, this function checks which categories the input image
01257 // belongs to and writes the results to the classification results file.
01258 void classify_image(const Histogram&   gist_vector,
01259                     const Classifiers& classifiers,
01260                     const std::string& image_name, int frame_number,
01261                     const std::string& ground_truth,
01262                     const std::string& results_file)
01263 {
01264    std::ofstream ofs(results_file.c_str(), std::ios::out | std::ios::app) ;
01265    ofs << image_name << ':' << frame_number << ' ' << ground_truth << ' ' ;
01266 
01267    Torch::Sequence gv(1, GistEstimatorBeyondBoF::gist_vector_size()) ;
01268    std::copy(gist_vector.begin(), gist_vector.end(), gv.frames[0]) ;
01269 
01270    int n = 0 ; // num categories into which input image can be classified
01271    const int N = classifiers.size() ;
01272    for (int i = 0; i < N; ++i) {
01273       classifiers[i]->forward(& gv) ;
01274       if (classifiers[i]->outputs->frames[0][0] > 0) {
01275          ofs << (i+1) << ' ' ;
01276          ++n ;
01277       }
01278    }
01279 
01280    if (! n) // input image could not be classified into any category
01281       ofs << '0' ;
01282    ofs << '\n' ;
01283 }
01284 
01285 // This function loads all the SVM classifiers beginning with the
01286 // specified "root" name. Here's how this is supposed to work:
01287 //
01288 // Let's say we 9 categories. Earlier runs of this program ought to have
01289 // created 9 SVM classifiers. Usually, these would be named
01290 // "XXX_svm_classifier.1", "XXX_svm_classifier.2", "XXX_svm_classifier.3"
01291 // and so on. This function will read each of these files back into
01292 // memory using the torch library (which is what created those files in
01293 // the first place).
01294 //
01295 // The kernel for each of these SVM classifiers is the histogram
01296 // intersection kernel as described in the Lazebnik paper.
01297 Classifiers
01298 load_classifiers(std::string classifiers_root_name, HistIntKernel* kernel)
01299 {
01300    classifiers_root_name += ".*" ;
01301    glob_t buf ;
01302    if (glob(classifiers_root_name.c_str(), 0, 0, & buf) != 0)
01303       throw std::runtime_error("couldn't find/load the SVM classifiers") ;
01304 
01305    const int N = buf.gl_pathc ;
01306    Classifiers classifiers(N) ;
01307    for (int i = 0; i < N; ++i) {
01308       classifiers[i] = new Torch::SVMClassification(kernel) ;
01309       classifiers[i]->load(buf.gl_pathv[i]) ;
01310    }
01311 
01312    globfree(& buf) ;
01313    return classifiers ;
01314 }
01315 
01316 // Delete all SVM classifier objects created in previous function
01317 void nuke_classifiers(Classifiers& C)
01318 {
01319    const int N = C.size() ;
01320    for (int i = 0; i < N; ++i)
01321       delete C[i] ;
01322 }
01323 
01324 // This function reads a gist vector from the specified input stream
01325 Histogram read_gist_vector(std::istream& is)
01326 {
01327    GistVector G ;
01328    is >> G ;
01329 
01330    Histogram H(GistEstimatorBeyondBoF::gist_vector_size(), 1, NO_INIT) ;
01331    std::copy(G.values.begin(), G.values.end(), H.beginw()) ;
01332    return H ;
01333 }
01334 
01335 } // end of local namespace encapsulating image classification section
01336 
01337 //-------------------------- UTILITY ROUTINES ---------------------------
01338 
01339 namespace {
01340 
01341 // Count the number of lines in a file (wc -l)
01342 int count_lines(const std::string& file_name)
01343 {
01344    int n = -1 ; // because EOF is read after final \n (1 extra iter. of loop)
01345    std::ifstream ifs(file_name.c_str()) ;
01346 
01347    std::string dummy ;
01348    while (ifs) {
01349       getline(ifs, dummy) ;
01350       ++n ;
01351    }
01352    return n ;
01353 }
01354 
01355 // Returns true if a floating point number is near zero
01356 bool is_zero(double d)
01357 {
01358    return std::fabs(d) <= std::numeric_limits<double>::epsilon() ;
01359 }
01360 
01361 } // end of local namespace encapsulating utility routines section
01362 
01363 //-----------------------------------------------------------------------
01364 
01365 #endif // #if !defined(HAVE_OPENCV) || !defined(INVT_HAVE_TORCH)
01366 
01367 /* So things look consistent in everyone's emacs... */
01368 /* Local Variables: */
01369 /* indent-tabs-mode: nil */
01370 /* End: */