train-gecb.C

Go to the documentation of this file.
00001 /*!
00002    \file Gist/train-gecb.C
00003    \brief Train and classify images using the context-based gist
00004    estimator.
00005 */
00006 
00007 // //////////////////////////////////////////////////////////////////// //
00008 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the //
00009 // University of Southern California (USC) and the iLab at USC.         //
00010 // See http://iLab.usc.edu for information about this project.          //
00011 // //////////////////////////////////////////////////////////////////// //
00012 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00013 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00014 // in Visual Environments, and Applications'' by Christof Koch and      //
00015 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00016 // pending; application number 09/912,225 filed July 23, 2001; see      //
00017 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00018 // //////////////////////////////////////////////////////////////////// //
00019 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00020 //                                                                      //
00021 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00022 // redistribute it and/or modify it under the terms of the GNU General  //
00023 // Public License as published by the Free Software Foundation; either  //
00024 // version 2 of the License, or (at your option) any later version.     //
00025 //                                                                      //
00026 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00027 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00028 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00029 // PURPOSE.  See the GNU General Public License for more details.       //
00030 //                                                                      //
00031 // You should have received a copy of the GNU General Public License    //
00032 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00033 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00034 // Boston, MA 02111-1307 USA.                                           //
00035 // //////////////////////////////////////////////////////////////////// //
00036 //
00037 // Primary maintainer for this file: Manu Viswanathan <mviswana at usc dot edu>
00038 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Gist/train-gecb.C $
00039 // $Id: train-gecb.C 14755 2011-04-29 05:55:18Z itti $
00040 //
00041 
00042 //------------------------------ HEADERS --------------------------------
00043 
00044 #include "Image/OpenCVUtil.H"  // must be first to avoid conflicting defs of int64, uint64
00045 
00046 // Gist specific headers
00047 #include "Neuro/GistEstimatorContextBased.H"
00048 
00049 // Other INVT headers
00050 #include "Neuro/StdBrain.H"
00051 #include "Neuro/NeuroOpts.H"
00052 #include "Neuro/NeuroSimEvents.H"
00053 
00054 #include "Media/SimFrameSeries.H"
00055 #include "Media/MediaOpts.H"
00056 
00057 #include "Simulation/SimEventQueue.H"
00058 #include "Simulation/SimEventQueueConfigurator.H"
00059 
00060 #include "Channels/ChannelOpts.H"
00061 #include "Component/ModelManager.H"
00062 #include "Component/ModelOptionDef.H"
00063 
00064 #include "Image/MathOps.H"
00065 #include "Image/MatrixOps.H"
00066 #include "Image/Point2D.H"
00067 
00068 #include "nub/ref.h"
00069 
00070 #ifndef HAVE_OPENCV // fake OpenCV API so as to not break builds
00071 
00072 namespace {
00073 
00074 struct CvMat {int rows, cols, type ;} ;
00075 
00076 inline CvMat* cvCreateMat(int, int, int) {return 0 ;}
00077 inline void   cvReleaseMat(CvMat**) {}
00078 inline double cvmGet(CvMat*, int, int) {return 0 ;}
00079 inline void   cvmSet(CvMat*, int, int, double) {}
00080 inline void   cvTranspose(const CvMat*, CvMat*) {}
00081 
00082 #define CV_32FC1 0
00083 inline int CV_MAT_TYPE(int) {return 0 ;}
00084 #define CV_MAT_ELEM(matrix, type, row, col) (type(0))
00085 
00086 #define CV_PCA_DATA_AS_COL 0
00087 inline void cvCalcPCA(const CvMat*, CvMat*, CvMat*, CvMat*, int) {}
00088 
00089 }
00090 
00091 #endif // OpenCV availability check
00092 
00093 // Standard C++ headers
00094 #include <fstream>
00095 #include <sstream>
00096 #include <ios>
00097 #include <numeric>
00098 #include <algorithm>
00099 #include <functional>
00100 #include <map>
00101 #include <vector>
00102 #include <iterator>
00103 #include <stdexcept>
00104 #include <utility>
00105 
00106 //------------------------------ DEFINES --------------------------------
00107 
00108 // "Raw" gist vectors are collections of 384 numbers. To reduce the
00109 // dimensionality of these vectors to make them faster to compare, etc.
00110 // we use PCA and extract the following number of principal components.
00111 #ifndef GECB_NUM_PRINCIPAL_COMPONENTS
00112    #define GECB_NUM_PRINCIPAL_COMPONENTS 80
00113 #endif
00114 
00115 //----------------------- FORWARD DECLARATIONS --------------------------
00116 
00117 namespace {
00118 
00119 // Some useful types for dealing with gist training vectors
00120 typedef Image<double> GistVector ;
00121 typedef std::map<int, GistVector> TrainingDB ;
00122 typedef TrainingDB::value_type TrainingDBEntry ;
00123 
00124 // PCA
00125 class OpenCVMatrix ;
00126 CvMat* load_training_vectors(const std::string& file_name, int M, int N) ;
00127 CvMat* pca(const OpenCVMatrix& data, int num_principal_components) ;
00128 
00129 // Image classification
00130 typedef std::pair<std::string, GistVector> InputImageData ;
00131 void classify_image(const InputImageData&, const TrainingDB&,
00132                     const std::string& results_file) ;
00133 
00134 // I/O
00135 void save(const OpenCVMatrix&, const std::string& file_name) ;
00136 
00137 void append(const Image<double>&,
00138             const std::string& file_name,
00139             const std::string& image_name = std::string()) ;
00140 Image<double> load_image(const std::string& file_name,
00141                          int width, int height) ;
00142 std::ostream& operator<<(std::ostream&, const Image<double>&) ;
00143 std::istream& operator>>(std::istream&, Image<double>&) ;
00144 
00145 TrainingDB load_training_database(const std::string& file_name) ;
00146 
00147 std::string getline(std::istream&) ;
00148 
00149 // Utilities
00150 int count_lines(const std::string& file_name) ;
00151 template<typename T> std::string to_string(const T&) ;
00152 
00153 }
00154 
00155 //-------------------------- OPENCV MATRICES ----------------------------
00156 
00157 // Crude encapsulation of OpenCV matrices
00158 namespace {
00159 
00160 class OpenCVMatrix {
00161    CvMat* matrix ;
00162 public :
00163    OpenCVMatrix(int num_rows, int num_cols, int type) ;
00164    OpenCVMatrix(CvMat*) ;
00165    ~OpenCVMatrix() ;
00166 
00167    int num_rows() const {return matrix->rows ;}
00168    int num_cols() const {return matrix->cols ;}
00169    int type()     const {return CV_MAT_TYPE(matrix->type) ;}
00170 
00171    template<typename T> // T must match matrix->type (float for CV_32FC1, etc.)
00172    T get(int i, int j) const {return CV_MAT_ELEM(*matrix, T, i, j) ;}
00173 
00174    operator CvMat*() const {return matrix ;} // auto conv. (usually a bad idea)
00175 } ;
00176 
00177 OpenCVMatrix::OpenCVMatrix(int num_rows, int num_cols, int type)
00178    : matrix(cvCreateMat(num_rows, num_cols, type))
00179 {
00180    if (! matrix)
00181       throw std::runtime_error("unable to create OpenCV matrix") ;
00182 }
00183 
00184 OpenCVMatrix::OpenCVMatrix(CvMat* M)
00185    : matrix(M)
00186 {
00187    if (! matrix)
00188       throw std::runtime_error("cannot create empty/null matrix") ;
00189 }
00190 
00191 OpenCVMatrix::~OpenCVMatrix()
00192 {
00193    cvReleaseMat(& matrix) ;
00194 }
00195 
00196 } // end of local namespace encapsulating above helper
00197 
00198 //----------------------- COMMAND LINE OPTIONS --------------------------
00199 
00200 // This program has four distinct phases/modes of operation, each one
00201 // specified via a suitable non-option command line argument.
00202 // Additionally, it supports several command line options to allow users
00203 // to tweak various parameters such as the name of the PCA matrix file,
00204 // training database, and so on.
00205 namespace {
00206 
00207 const ModelOptionCateg MOC_GECB = {
00208    MOC_SORTPRI_3,
00209    "Options specific to the context-based gist program",
00210 } ;
00211 
00212 // In the training vectors accumulation phase, we collect all the gist
00213 // vectors that will be used as the input to the PCA into a plain text
00214 // file.
00215 #ifndef GECB_DEFAULT_TRAINING_VECTORS_FILE
00216    #define GECB_DEFAULT_TRAINING_VECTORS_FILE "gecb_training_vectors.txt"
00217 #endif
00218 
00219 const ModelOptionDef OPT_TrainingVectors = {
00220    MODOPT_ARG_STRING, "TrainingVectors", & MOC_GECB, OPTEXP_CORE,
00221    "This option specifies the name of the file where the training vectors\n"
00222    "should be accumulated or read from. This is a plain text file containing\n"
00223    "the training vectors matrix that will be fed into the PCA procedure.\n"
00224    "Each line of this file will contain a row of \"raw\" (i.e., 384-\n"
00225    "dimensional) gist vectors. For PCA, these rows will be read in as the\n"
00226    "columns of the data matrix.\n",
00227    "training-vectors", '\0', "training-vectors-file",
00228    GECB_DEFAULT_TRAINING_VECTORS_FILE,
00229 } ;
00230 
00231 // For PCA, we use the accumulated training vectors, i.e., the
00232 // 384-dimensional "raw" gist vectors, and produce the transformation
00233 // matrix that will convert these raw gist vectors into 80-dimensional
00234 // vectors. This transformation matrix is stored in the following file.
00235 #ifndef GECB_DEFAULT_PCA_MATRIX_FILE
00236    #define GECB_DEFAULT_PCA_MATRIX_FILE "gecb_pca_matrix.txt"
00237 #endif
00238 
00239 const ModelOptionDef OPT_PCAMatrix = {
00240    MODOPT_ARG_STRING, "PCAMatrix", & MOC_GECB, OPTEXP_CORE,
00241    "This option specifies the name of the file in which the 384x80 PCA\n"
00242    "transformation matrix is (or will be) stored. \"Raw\" 384-dimensional\n"
00243    " gist vectors can be reduced to 80 dimensions by muliplying with this\n"
00244    "matrix. The PCA transformation matrix is stored in a plain text file\n"
00245    "with each line containing one row of the matrix.\n",
00246    "pca-matrix", '\0', "pca-matrix-file",
00247    GECB_DEFAULT_PCA_MATRIX_FILE,
00248 } ;
00249 
00250 // In the second phase of training, we use the PCA transformation matrix
00251 // to output the 80-dimensional gist vectors of the training images and
00252 // store them in a training set under the specified "entry name" and
00253 // segment number.
00254 //
00255 // DEVNOTE: The default values for these options are not very useful.
00256 // They really ought to be specified on the command line.
00257 #ifndef GECB_DEFAULT_IMAGE_NAME
00258    #define GECB_DEFAULT_IMAGE_NAME "some_image"
00259 #endif
00260 #ifndef GECB_DEFAULT_SEGMENT_NUMBER
00261    #define GECB_DEFAULT_SEGMENT_NUMBER "0"
00262 #endif
00263 
00264 const ModelOptionDef OPT_ImageName = {
00265    MODOPT_ARG_STRING, "ImageName", & MOC_GECB, OPTEXP_CORE,
00266    "This option specifies the \"root\" name of an entry in the training\n"
00267    "set or the results file. The image number will be automatically\n"
00268    "appended to this \"root\" name. The training database is a plain text\n"
00269    "file containing one entry per line. The first field specifies the name\n"
00270    "plus number of the entry (e.g., foo.1, foo.2, bar.1, and so on). The\n"
00271    "second field is the segment number for this image. The remaining fields\n"
00272    "are the 80 numbers making up the image's gist vector.\n\n"
00273    "In classification mode, this option specifies the name of the input\n"
00274    "image's gist vector that is written to the results file.\n",
00275    "image-name", '\0', "image-name-root",
00276    GECB_DEFAULT_IMAGE_NAME,
00277 } ;
00278 
00279 const ModelOptionDef OPT_SegmentNumber = {
00280    MODOPT_ARG_STRING, "SegmentNumber", & MOC_GECB, OPTEXP_CORE,
00281    "This option specifies the segment number for an image in the training\n"
00282    "set. The segment number is used in the third phase of training to\n"
00283    "compute the mean 80-D gist vectors for each segment and during\n"
00284    "classification to ascertain the segment number for each input image.\n",
00285    "segment-number", '\0', "image-segment-number",
00286    GECB_DEFAULT_SEGMENT_NUMBER,
00287 } ;
00288 
00289 // The output of the second phase of training, i.e., the 80-D gist
00290 // vectors for each of the training images, is sent to the following
00291 // file.
00292 #ifndef GECB_DEFAULT_TRAINING_SET
00293    #define GECB_DEFAULT_TRAINING_SET "gecb_training_set.txt"
00294 #endif
00295 
00296 const ModelOptionDef OPT_TrainingSet = {
00297    MODOPT_ARG_STRING, "TrainingSet", & MOC_GECB, OPTEXP_CORE,
00298    "This option specifies the name of the training set, a plain text\n"
00299    "file containing one entry per line. The first field specifies the name\n"
00300    "plus number of the entry (e.g., foo.1, foo.2, bar.1, and so on). The\n"
00301    "second field is the segment number for this image. And the remaining\n"
00302    "fields are the 80 numbers that make up the image's gist vector.\n",
00303    "training-set", '\0', "training-set-file",
00304    GECB_DEFAULT_TRAINING_SET,
00305 } ;
00306 
00307 // The training database is a plain text file that specifies the mean
00308 // gist vector for each segment.
00309 #ifndef GECB_DEFAULT_TRAINING_DATABASE
00310    #define GECB_DEFAULT_TRAINING_DATABASE "gecb_training_db.txt"
00311 #endif
00312 
00313 const ModelOptionDef OPT_TrainingDB = {
00314    MODOPT_ARG_STRING, "TrainingDB", & MOC_GECB, OPTEXP_CORE,
00315    "This option specifies the name of the training database, a plain text\n"
00316    "file containing one entry per line. The first field specifies the\n"
00317    "segment number. And the remaining fields are the 80 numbers that make\n"
00318    "up the segment's mean gist vector.\n",
00319    "training-db", '\0', "training-db-file",
00320    GECB_DEFAULT_TRAINING_DATABASE,
00321 } ;
00322 
00323 // In image classification mode, we write the results to the following
00324 // file.
00325 #ifndef GECB_DEFAULT_CLASSIFICATION_RESULTS_FILE
00326    #define GECB_DEFAULT_CLASSIFICATION_RESULTS_FILE "gecb_classifications.txt"
00327 #endif
00328 
00329 const ModelOptionDef OPT_ResultsFile = {
00330    MODOPT_ARG_STRING, "ResultsFile", & MOC_GECB, OPTEXP_CORE,
00331    "This option specifies the name of the classification results file,\n"
00332    "a plain text file containing one result entry per line. The first\n"
00333    "field specifies the name plus number of the input image, (e.g., foo.1,\n"
00334    "foo.2, bar.1, and so on). Then come the numbers of the top five matching\n"
00335    "segments from the training database.\n",
00336    "results-file", '\0', "classification-results-file",
00337    GECB_DEFAULT_CLASSIFICATION_RESULTS_FILE,
00338 } ;
00339 
00340 // The different operational modes of this program must be specified as
00341 // the one and only non-option command line argument. This "action"
00342 // command must be one of the following strings (case-sensitive!):
00343 //
00344 // 1. accumulate -- accumulate the training vectors in the plain text
00345 //    file specified by the --training-vectors option (default is to
00346 //    accumulate the training vectors in ./gecb_training_vectors.txt.
00347 //
00348 // 2. pca -- compute the transformation matrix that will allow us to
00349 //    reduce the 384-dimensional "raw" gist vectors to their 80 principal
00350 //    components using the PCA support in OpenCV.
00351 //
00352 //    The --training-vectors option can be used to specify the input file
00353 //    for the PCA and --pca-matrix option can be used to specify the file
00354 //    in which the transformation matrix should be saved. The defaults
00355 //    are to read from ./gecb_training_vectors.txt and write to
00356 //    ./gecb_pca_matrix.txt.
00357 //
00358 // 3. train -- compute the gist vectors for the training set. The output
00359 //    is sent to the text file specified by the --training-set option.
00360 //    It is a good idea to also supply the --image-name and
00361 //    --segment-number options when saving training gist vectors from an
00362 //    MPEG. A good choice of the entry's name would be the basename of
00363 //    the MPEG file sans extension.
00364 //
00365 // 4. means -- given the 80-dimensional gist vectors for the training
00366 //    images, this action computes the mean gist vectors for each
00367 //    segment. The output is sent to the text file specified by the
00368 //    --training-db option.
00369 //
00370 // 5. classify -- uses the PCA transformation matrix and training
00371 //    database produced by the pca and means commands to classify the
00372 //    input images streaming in.
00373 #ifndef GECB_ACCUMULATE_CMD
00374    #define GECB_ACCUMULATE_CMD "accumulate"
00375 #endif
00376 #ifndef GECB_PCA_CMD
00377    #define GECB_PCA_CMD "pca"
00378 #endif
00379 #ifndef GECB_TRAIN_CMD
00380    #define GECB_TRAIN_CMD "train"
00381 #endif
00382 #ifndef GECB_MEANS_CMD
00383    #define GECB_MEANS_CMD "means"
00384 #endif
00385 #ifndef GECB_CLASSIFY_CMD
00386    #define GECB_CLASSIFY_CMD "classify"
00387 #endif
00388 
00389 // For printing usage info
00390 #ifndef GECB_ACTIONS
00391    #define GECB_ACTIONS ("{"GECB_ACCUMULATE_CMD"|"GECB_PCA_CMD"|"\
00392                           GECB_TRAIN_CMD"|"GECB_MEANS_CMD"|"\
00393                           GECB_CLASSIFY_CMD"}")
00394 #endif
00395 
00396 } // end of local namespace encapsulating command line options section
00397 
00398 //--------------------- SIMULATION ENCAPSULATION ------------------------
00399 
00400 namespace {
00401 
00402 // The following helper class wraps around the ModelManager and
00403 // associated objects, providing a neatly encapsulated API for the main
00404 // program.
00405 class ContextBasedSimulation {
00406    ModelManager model_manager ;
00407    nub::soft_ref<SimEventQueueConfigurator> configurator ;
00408    nub::soft_ref<StdBrain> brain ;
00409    nub::ref<SimInputFrameSeries> input_frame_series ;
00410 
00411    // Various command line options specific to this program
00412    OModelParam<std::string> tv_option ; // --training-vectors
00413    OModelParam<std::string> pm_option ; // --pca-matrix
00414    OModelParam<std::string> ts_option ; // --training-set
00415    OModelParam<std::string> td_option ; // --training-db
00416    OModelParam<std::string> rf_option ; // --results-file
00417    OModelParam<std::string> in_option ; // --image-name (not --in!)
00418    OModelParam<std::string> sn_option ; // --segment-number
00419 
00420 public :
00421    ContextBasedSimulation(const std::string& model_name) ;
00422    void parse_command_line(int argc, const char* argv[]) ;
00423    void run() ;
00424    ~ContextBasedSimulation() ;
00425 
00426 private :
00427    // The different actions performed by this program
00428    typedef void (ContextBasedSimulation::*Action)() ;
00429    typedef std::map<std::string, Action> ActionMap ;
00430    ActionMap action_map ;
00431 
00432    void accumulate_training_vectors() ; // for input to PCA
00433    void compute_pca_matrix() ;          // using training vectors
00434    void compute_training_vectors() ;    // using PCA matrix
00435    void compute_segment_means() ;       // using training vectors
00436    void classify_input_images() ;       // using PCA matrix & segment means
00437 
00438    // Accessors for retrieving some of the command line arguments
00439    std::string training_vectors_file() {return tv_option.getVal() ;}
00440    std::string pca_matrix_file()   {return pm_option.getVal() ;}
00441    std::string training_set()      {return ts_option.getVal() ;}
00442    std::string training_database() {return td_option.getVal() ;}
00443    std::string results_file()      {return rf_option.getVal() ;}
00444    std::string image_name()        {return in_option.getVal() ;}
00445    std::string segment_number()    {return sn_option.getVal() ;}
00446 } ;
00447 
00448 // On instantiation, create the model manager and the simulation's
00449 // various components.
00450 ContextBasedSimulation::ContextBasedSimulation(const std::string& model_name)
00451    : model_manager(model_name),
00452      configurator(new SimEventQueueConfigurator(model_manager)),
00453      brain(new StdBrain(model_manager)),
00454      input_frame_series(new SimInputFrameSeries(model_manager)),
00455      tv_option(& OPT_TrainingVectors, & model_manager),
00456      pm_option(& OPT_PCAMatrix, & model_manager),
00457      ts_option(& OPT_TrainingSet, & model_manager),
00458      td_option(& OPT_TrainingDB, & model_manager),
00459      rf_option(& OPT_ResultsFile, & model_manager),
00460      in_option(& OPT_ImageName, & model_manager),
00461      sn_option(& OPT_SegmentNumber, & model_manager)
00462 {
00463    model_manager.addSubComponent(configurator) ;
00464    model_manager.addSubComponent(brain) ;
00465    model_manager.addSubComponent(input_frame_series) ;
00466 
00467    typedef ContextBasedSimulation me ; // typing shortcut
00468    action_map[GECB_ACCUMULATE_CMD] = & me::accumulate_training_vectors ;
00469    action_map[GECB_PCA_CMD]        = & me::compute_pca_matrix ;
00470    action_map[GECB_TRAIN_CMD]      = & me::compute_training_vectors ;
00471    action_map[GECB_MEANS_CMD]      = & me::compute_segment_means ;
00472    action_map[GECB_CLASSIFY_CMD]   = & me::classify_input_images ;
00473 }
00474 
00475 // TODO: Do we really need the single channel save raw maps option for
00476 // this texton training program? And how can we force the gist estimator
00477 // type to be always GistEstimatorTexton? This program doesn't make sense
00478 // for any other gist estimator.
00479 void ContextBasedSimulation::parse_command_line(int argc, const char* argv[])
00480 {
00481    model_manager.setOptionValString(& OPT_SingleChannelSaveRawMaps, "true") ;
00482    model_manager.setOptionValString(& OPT_GistEstimatorType, "ContextBased") ;
00483    model_manager.setOptionValString(& OPT_NumOrientations, "6") ;
00484 
00485    model_manager.setOptionValString(& OPT_TrainingVectors,
00486                                     GECB_DEFAULT_TRAINING_VECTORS_FILE) ;
00487    model_manager.setOptionValString(& OPT_PCAMatrix,
00488                                     GECB_DEFAULT_PCA_MATRIX_FILE) ;
00489    model_manager.setOptionValString(& OPT_TrainingSet,
00490                                     GECB_DEFAULT_TRAINING_SET) ;
00491    model_manager.setOptionValString(& OPT_TrainingDB,
00492                                     GECB_DEFAULT_TRAINING_DATABASE) ;
00493    model_manager.setOptionValString(& OPT_ResultsFile,
00494                                     GECB_DEFAULT_CLASSIFICATION_RESULTS_FILE) ;
00495    model_manager.setOptionValString(& OPT_ImageName,
00496                                     GECB_DEFAULT_IMAGE_NAME) ;
00497    model_manager.setOptionValString(& OPT_SegmentNumber,
00498                                     GECB_DEFAULT_SEGMENT_NUMBER) ;
00499 
00500    if (! model_manager.parseCommandLine(argc, argv, GECB_ACTIONS, 1, 1))
00501       throw std::runtime_error("command line parse error") ;
00502 }
00503 
00504 // To run the simulation, we simply dispatch to the function
00505 // corresponding to the action (non-option) command line argument.
00506 void ContextBasedSimulation::run()
00507 {
00508    std::string cmd(model_manager.getExtraArg(0)) ;
00509    ActionMap::iterator action = action_map.find(cmd) ;
00510    if (action == action_map.end())
00511       throw std::runtime_error(cmd + ": sorry, unknown action") ;
00512    (this->*(action->second))() ;
00513 }
00514 
00515 // Quick helper class to start and stop model manager (useful when
00516 // exceptions are thrown because destructor automatically stops the model
00517 // manager without requiring an explicit call to the stop method prior to
00518 // throwing the exception).
00519 class ModelManagerStarter {
00520    ModelManager& mgr ;
00521 public :
00522    ModelManagerStarter(ModelManager& m) : mgr(m) {mgr.start() ;}
00523    ~ModelManagerStarter() {mgr.stop() ;}
00524 } ;
00525 
00526 // This method implements the simulation's main loop for the "accumulate"
00527 // action, which evolves the different components of the simulation and
00528 // saves the raw (384-dimensional) gist vectors to the training vectors
00529 // file.
00530 void ContextBasedSimulation::accumulate_training_vectors()
00531 {
00532    ModelManagerStarter M(model_manager) ;
00533 
00534    LFATAL("fixme");
00535    nub::soft_ref<GistEstimatorContextBased> ge;///// =
00536    ///////      dynCastWeak<GistEstimatorContextBased>(brain->getGE()) ;
00537    if (ge.isInvalid())
00538       throw std::runtime_error("can only use GistEstimatorContextBased") ;
00539 
00540    nub::ref<SimEventQueue> event_queue = configurator->getQ() ;
00541    for(;;)
00542    {
00543       try
00544       {
00545          SeC<SimEventGistOutput> gist_out =
00546             event_queue->check<SimEventGistOutput>(brain.get(),
00547                                                    SEQ_UNMARKED | SEQ_MARK,
00548                                                    ge.get()) ;
00549          if (gist_out) // gist vector waiting to be picked up
00550             append(ge->getGist(), training_vectors_file()) ;
00551          if (event_queue->evolve() != SIM_CONTINUE)
00552             break ;
00553       }
00554       catch (lfatal_exception&) // if we seek beyond end of frame series
00555       {
00556          return ; // prevent LFATAL induced abortion
00557       }
00558    }
00559 }
00560 
00561 // The following method implements the "pca" action of this program,
00562 // which produces the PCA transformation matrix that allows us to reduce
00563 // the dimensionality of gist vectors from 384 to 80.
00564 void ContextBasedSimulation::compute_pca_matrix()
00565 {
00566    LINFO("MVN: counting lines in %s", training_vectors_file().c_str()) ;
00567    const int cols = count_lines(training_vectors_file()) ;
00568    const int rows = static_cast<int>(GistEstimatorContextBased::NUM_FEATURES) ;
00569 
00570    LINFO("MVN: reading %d training vectors from %s",
00571          cols, training_vectors_file().c_str()) ;
00572    OpenCVMatrix training_vectors =
00573       load_training_vectors(training_vectors_file(), rows, cols) ;
00574 
00575    LINFO("MVN: doing PCA on training vectors to get %d principal components",
00576          GECB_NUM_PRINCIPAL_COMPONENTS) ;
00577    OpenCVMatrix pca_matrix = pca(training_vectors,
00578                                  GECB_NUM_PRINCIPAL_COMPONENTS) ;
00579 
00580    LINFO("MVN: PCA done; saving PCA transformation matrix to %s",
00581          pca_matrix_file().c_str()) ;
00582    save(pca_matrix, pca_matrix_file()) ;
00583 }
00584 
00585 // This method implements the "train" action of this program. Like the
00586 // accumulate action, it implements a "main loop" for the simulation,
00587 // evolving different components with each iteration. But rather than
00588 // saving raw gist vectors to a file, it uses the transformation matrix
00589 // computed by the "pca" action to reduce the dimensionality of the raw
00590 // gist vectors from 384 to 80 and then saves these 80-dimensional
00591 // vectors to the training database.
00592 void ContextBasedSimulation::compute_training_vectors()
00593 {
00594    ModelManagerStarter M(model_manager) ;
00595 
00596    LFATAL("fixme");
00597    nub::soft_ref<GistEstimatorContextBased> ge;///// =
00598    ////////      dynCastWeak<GistEstimatorContextBased>(brain->getGE()) ;
00599    if (ge.isInvalid())
00600       throw std::runtime_error("can only use GistEstimatorContextBased") ;
00601 
00602    Image<double> W = load_image(pca_matrix_file(),
00603                                 GECB_NUM_PRINCIPAL_COMPONENTS,
00604                                 count_lines(pca_matrix_file())) ;
00605    LINFO("MVN: loaded %dx%d PCA transformation matrix from %s",
00606          W.getHeight(), W.getWidth(), pca_matrix_file().c_str()) ;
00607 
00608    int i = 1 ;
00609    nub::ref<SimEventQueue> event_queue = configurator->getQ() ;
00610    for(;;)
00611    {
00612       try
00613       {
00614          SeC<SimEventGistOutput> gist_out =
00615             event_queue->check<SimEventGistOutput>(brain.get(),
00616                                                    SEQ_UNMARKED | SEQ_MARK,
00617                                                    ge.get()) ;
00618          if (gist_out) // gist vector waiting to be picked up
00619             append(vmMult(ge->getGist(), W),
00620                    training_set(),
00621                    image_name() + to_string(i++) + " " + segment_number()) ;
00622          if (event_queue->evolve() != SIM_CONTINUE)
00623             break ;
00624       }
00625       catch (lfatal_exception&) // if we seek beyond end of frame series
00626       {
00627          return ; // prevent LFATAL induced abortion
00628       }
00629    }
00630 }
00631 
00632 // Once we have the training set, we compute the segment means by
00633 // looking at the segment number for each image in the training set and
00634 // summing its gist vector to the running total for that segment. At the
00635 // end of this process, we simply divide the totals for each segment by
00636 // the count of gist vectors encountered for that segment.
00637 //
00638 // This averaging operation is facilitated by a map that associates a
00639 // segment number with a gist vector sum and a corresponding count.
00640 void ContextBasedSimulation::compute_segment_means()
00641 {
00642    typedef std::pair<int, GistVector> SegmentInfo ; // count and sum
00643    typedef std::map<int, SegmentInfo> MeansMap ;
00644    MeansMap means ;
00645 
00646    LINFO("reading training set data from %s", training_set().c_str()) ;
00647    std::ifstream ifs(training_set().c_str()) ;
00648    for(;;)
00649    {
00650       try
00651       {
00652          std::istringstream line(getline(ifs)) ;
00653 
00654          std::string name ; int segment ;
00655          GistVector G(GECB_NUM_PRINCIPAL_COMPONENTS, 1, ZEROS) ;
00656          line >> name >> segment >> G ;
00657 
00658          MeansMap::iterator it = means.find(segment) ;
00659          if (it == means.end())
00660          {
00661             means.insert(std::make_pair(segment, SegmentInfo(1, G))) ;
00662          }
00663          else
00664          {
00665             SegmentInfo& I = it->second ;
00666             ++I.first ; // count
00667             I.second += G ; // gist vector sum
00668          }
00669       }
00670       catch (std::exception&) // ought to be just EOF for ifs
00671       {
00672          break ;
00673       }
00674    }
00675 
00676    LINFO("computing gist vector averages for each segment and saving to %s",
00677          training_database().c_str()) ;
00678    std::ofstream ofs(training_database().c_str()) ;
00679    for (MeansMap::iterator it = means.begin(); it != means.end(); ++it)
00680    {
00681       SegmentInfo& I = it->second ;
00682       I.second /= I.first ; // gist vector totals by num vectors for segment
00683       ofs << it->first << ' ' << I.second ;
00684    }
00685 }
00686 
00687 // The following method implements this program's "classify" action. It
00688 // loads the PCA transformation matrix and the training vectors database
00689 // and then uses a simple Euclidean distance measure to compute the
00690 // closest match for the input image.
00691 void ContextBasedSimulation::classify_input_images()
00692 {
00693    ModelManagerStarter M(model_manager) ;
00694 
00695    LFATAL("fixme");
00696    nub::soft_ref<GistEstimatorContextBased> ge;///////// =
00697    //////      dynCastWeak<GistEstimatorContextBased>(brain->getGE()) ;
00698    if (ge.isInvalid())
00699       throw std::runtime_error("can only use GistEstimatorContextBased") ;
00700 
00701    Image<double> W = load_image(pca_matrix_file(),
00702                                 GECB_NUM_PRINCIPAL_COMPONENTS,
00703                                 count_lines(pca_matrix_file())) ;
00704    LINFO("MVN: loaded %dx%d PCA transformation matrix from %s",
00705          W.getHeight(), W.getWidth(), pca_matrix_file().c_str()) ;
00706 
00707    TrainingDB training_db = load_training_database(training_database()) ;
00708    LINFO("MVN: loaded %d training vectors from %s",
00709          int(training_db.size()), training_database().c_str()) ;
00710 
00711    int i = 1 ;
00712    nub::ref<SimEventQueue> event_queue = configurator->getQ() ;
00713    for(;;)
00714    {
00715       try
00716       {
00717          SeC<SimEventGistOutput> gist_out =
00718             event_queue->check<SimEventGistOutput>(brain.get(),
00719                                                    SEQ_UNMARKED | SEQ_MARK,
00720                                                    ge.get()) ;
00721          if (gist_out) // gist vector waiting to be picked up
00722             classify_image(std::make_pair(image_name() + to_string(i++),
00723                                           vmMult(ge->getGist(), W)),
00724                            training_db, results_file()) ;
00725          if (event_queue->evolve() != SIM_CONTINUE)
00726             break ;
00727       }
00728       catch (lfatal_exception&) // if we seek beyond end of frame series
00729       {
00730          return ; // prevent LFATAL induced abortion
00731       }
00732    }
00733 }
00734 
00735 // Do we really not have to delete the configurator, brain and input
00736 // frame series? If it turns out we do, this empty destructor will have
00737 // to be filled out with the necessary delete calls...
00738 ContextBasedSimulation::~ContextBasedSimulation() {}
00739 
00740 } // end of local namespace encapsulating simulation encapsulation section
00741 
00742 //------------------------------- MAIN ----------------------------------
00743 
00744 #ifdef HAVE_OPENCV
00745 
00746 int main(int argc, const char* argv[])
00747 {
00748    MYLOGVERB = LOG_INFO ; // suppress debug messages
00749    try
00750    {
00751       ContextBasedSimulation S("train-gecb Model") ;
00752       S.parse_command_line(argc, argv) ;
00753       S.run() ;
00754    }
00755    catch (std::exception& e)
00756    {
00757      LFATAL("%s", e.what()) ;
00758       return 1 ;
00759    }
00760    return 0 ;
00761 }
00762 
00763 #else
00764 
00765 int main()
00766 {
00767    LINFO("Sorry, this program needs OpenCV.") ;
00768    return 1 ;
00769 }
00770 
00771 #endif
00772 
00773 //-------------------------------- PCA ----------------------------------
00774 
00775 namespace {
00776 
00777 // Create the MxN data matrix for PCA from the training vectors file.
00778 // Each line of this file becomes a column of the data matrix.
00779 CvMat* load_training_vectors(const std::string& file_name, int M, int N)
00780 {
00781    CvMat* data = cvCreateMat(M, N, CV_32FC1) ;
00782 
00783    double d ;
00784    std::ifstream ifs(file_name.c_str()) ;
00785    for (int j = 0; j < N; ++j)
00786       for (int i = 0; i < M; ++i) { // column-major reading
00787          if (! ifs) {
00788             cvReleaseMat(& data) ;
00789             throw std::runtime_error(file_name + ": out of data?!?") ;
00790          }
00791          ifs >> d ;
00792          cvmSet(data, i, j, d) ;
00793       }
00794 
00795    return data ;
00796 }
00797 
00798 // Return the PCA transformation matrix that will allow extraction of the
00799 // D principal components from input data.
00800 CvMat* pca(const OpenCVMatrix& data, int D)
00801 {
00802    OpenCVMatrix means = cvCreateMat(data.num_rows(), 1, CV_32FC1) ;
00803    OpenCVMatrix eigenvalues = cvCreateMat(D, 1, CV_32FC1) ;
00804    OpenCVMatrix eigenvectors = cvCreateMat(D, data.num_rows(), CV_32FC1) ;
00805 
00806    cvCalcPCA(data, means, eigenvalues, eigenvectors, CV_PCA_DATA_AS_COL) ;
00807 
00808    CvMat* pca_matrix = cvCreateMat(data.num_rows(), D, CV_32FC1) ;
00809    cvTranspose(eigenvectors, pca_matrix) ;
00810    return pca_matrix ;
00811 }
00812 
00813 } // end of local namespace encapsulating training vectors section
00814 
00815 //----------------------- IMAGE CLASSIFICATION --------------------------
00816 
00817 // Given the 80-D gist vectors for an input image and each of the
00818 // training segments, we can tell which training segment the input image
00819 // matches most closely by performing a Euclidean distance check between
00820 // the input image's gist vector and the vectors of each of the training
00821 // segments.
00822 namespace {
00823 
00824 // When computing the Euclidean distance between the input image's gist
00825 // vector and that of each of the training segments, we want to be able
00826 // to tell which training segment is the closest. For that, we use the
00827 // following pair that "maps" a training segment number to its
00828 // corresponding distance.
00829 typedef std::pair<int, double> SegmentDistance ;
00830 
00831 // To sort segment distances, we want to compare the Euclidean distances
00832 // rather than their names.
00833 bool distance_cmp(const SegmentDistance& L, const SegmentDistance& R)
00834 {
00835    return L.second < R.second ;
00836 }
00837 
00838 // But when writing classification results, we're only interested in the
00839 // matching training segment's number and not really in the Euclidean
00840 // distance between its gist vector and that of the input image.
00841 std::ostream& operator<<(std::ostream& os, const SegmentDistance& D)
00842 {
00843    return os << D.first ;
00844 }
00845 
00846 // Given an entry from the training database, the following function
00847 // object returns the Euclidean distance between the supplied input
00848 // image's gist vector and the training segment's gist vector.
00849 struct euclidean_distance
00850    : public std::binary_function<TrainingDBEntry, GistVector, SegmentDistance>
00851 {
00852    SegmentDistance
00853    operator()(const TrainingDBEntry& E, const GistVector& input) const {
00854       return std::make_pair(E.first, distance<float>(input, E.second)) ;
00855    }
00856 } ;
00857 
00858 // This function computes the Euclidean distance between the input
00859 // image's 80-D gist vector and each of the 80-D gist vectors for the
00860 // training segments and then writes the top five matches to the
00861 // specified results file.
00862 //
00863 // DEVNOTE: To output the top five matches to the results file, we ought
00864 // to be able to use the std::copy algorithm in conjunction with
00865 // std::ostream_iterator<SegmentDistance>. Unfortunately, ostream
00866 // iterators cannot be used with user-defined types. This is entirely in
00867 // keeping with C++'s philosophy of sucking ass most of the time but
00868 // sucking ass big-time only every now and then.
00869 void classify_image(const InputImageData& input, const TrainingDB& db,
00870                     const std::string& results_file)
00871 {
00872    std::vector<SegmentDistance> distances ;
00873    std::transform(db.begin(), db.end(), std::back_inserter(distances),
00874                   std::bind2nd(euclidean_distance(), input.second)) ;
00875    std::sort(distances.begin(), distances.end(), distance_cmp) ;
00876 
00877    std::ofstream ofs(results_file.c_str(), std::ios::out | std::ios::app) ;
00878    ofs << input.first << ' ' ;
00879    //std::copy(distances.begin(), distances.begin() + 5,
00880              //std::ostream_iterator<SegmentDistance>(ofs, " ")) ; // ERROR!
00881    for (unsigned int i = 0; i < distances.size() && i < 5; ++i)
00882       ofs << distances[i] << ' ' ;
00883    ofs << '\n' ;
00884 }
00885 
00886 } // end of local namespace encapsulating image classification section
00887 
00888 //-------------------------------- I/O ----------------------------------
00889 
00890 namespace {
00891 
00892 // Save an OpenCV matrix to the specified file
00893 void save(const OpenCVMatrix& M, const std::string& file_name)
00894 {
00895    std::ofstream ofs(file_name.c_str()) ;
00896    for (int i = 0; i < M.num_rows(); ++i) {
00897       for (int j = 0; j < M.num_cols(); ++j)
00898          ofs << M.get<float>(i, j) << ' ' ;
00899       ofs << '\n' ;
00900    }
00901 }
00902 
00903 // Append an Image to the specified file, optionally writing an image
00904 // name to the first row.
00905 void append(const Image<double>& I, const std::string& file_name,
00906             const std::string& image_name)
00907 {
00908    if (! I.initialized())
00909       throw std::runtime_error("save empty image to " + file_name + "?!?") ;
00910    if (file_name.empty())
00911       throw std::runtime_error("must specify file name for saving Image") ;
00912 
00913    std::ofstream ofs(file_name.c_str(), std::ios::out | std::ios::app) ;
00914    if (! image_name.empty())
00915       ofs << image_name << ' ' ;
00916    ofs << I ;
00917 }
00918 
00919 // Load Image stored in a file
00920 Image<double>
00921 load_image(const std::string& file_name, int width, int height)
00922 {
00923    try
00924    {
00925       Image<double> I(width, height, NO_INIT) ;
00926       std::ifstream ifs(file_name.c_str()) ;
00927       ifs >> I ;
00928       return I ;
00929    }
00930    catch (std::exception&)
00931    {
00932       throw std::runtime_error(file_name + ": out of data?!?") ;
00933    }
00934 }
00935 
00936 // Stream insertion operator for an Image<double>
00937 std::ostream& operator<<(std::ostream& os, const Image<double>& I)
00938 {
00939    for (int y = 0; y < I.getHeight(); ++y) {
00940       for (int x = 0; x < I.getWidth(); ++x)
00941          os << I.getVal(x, y) << ' ' ;
00942       os << '\n' ;
00943    }
00944    return os ;
00945 }
00946 
00947 // The following extraction operator reads in an Image<double> from the
00948 // supplied input stream.
00949 //
00950 // WARNING: It *assumes* that the image has already allocated some memory
00951 // for itself and that the input stream has those many elements.
00952 std::istream& operator>>(std::istream& is, Image<double>& I)
00953 {
00954    double d ;
00955    for (int y = 0; y < I.getHeight(); ++y)
00956       for (int x = 0; x < I.getWidth(); ++x)
00957          if (is >> d)
00958             I.setVal(x, y, d) ;
00959          else
00960             throw std::runtime_error("not enough data for Image<double>?!?") ;
00961    return is ;
00962 }
00963 
00964 // The following function reads the training "database," which is a plain
00965 // text file containing one entry per line. Each line starts with the
00966 // segment number and then come the eighty numbers making up that
00967 // segment's mean gist vector.
00968 TrainingDB load_training_database(const std::string& file_name)
00969 {
00970    TrainingDB db ;
00971 
00972    std::ifstream ifs(file_name.c_str()) ;
00973    for(;;)
00974    {
00975       try
00976       {
00977          std::istringstream line(getline(ifs)) ;
00978 
00979          int segment ;
00980          GistVector G(GECB_NUM_PRINCIPAL_COMPONENTS, 1, ZEROS) ;
00981          line >> segment >> G ;
00982 
00983          db.insert(std::make_pair(segment, G)) ;
00984       }
00985       catch (std::exception&) // ought only to happen on ifs EOF
00986       {
00987          break ;
00988       }
00989    }
00990 
00991    return db ;
00992 }
00993 
00994 // Read a line from the input stream and return it as a string
00995 std::string getline(std::istream& is)
00996 {
00997    std::string line ;
00998    getline(is, line) ;
00999    if (! is || line.empty()) // EOF
01000       throw std::runtime_error("unable to read from input stream") ;
01001    return line ;
01002 }
01003 
01004 } // end of local namespace encapsulating texton accumulation section
01005 
01006 //-------------------------- UTILITY ROUTINES ---------------------------
01007 
01008 namespace {
01009 
01010 // Convenient (but perhaps not the most efficient) helper to convert
01011 // various data types to strings.
01012 //
01013 // DEVNOTE: Works as long as type T defines an operator << that writes to
01014 // an ostream.
01015 template<typename T>
01016 std::string to_string(const T& t)
01017 {
01018    std::ostringstream str ;
01019    str << t ;
01020    return str.str() ;
01021 }
01022 
01023 // Count the number of lines in a file (wc -l)
01024 int count_lines(const std::string& file_name)
01025 {
01026    int n = -1 ; // because EOF is read after final \n (1 extra iter. of loop)
01027    std::ifstream ifs(file_name.c_str()) ;
01028 
01029    std::string dummy ;
01030    while (ifs) {
01031       getline(ifs, dummy) ;
01032       ++n ;
01033    }
01034    return n ;
01035 }
01036 
01037 } // end of local namespace encapsulating utility routines section
01038 
01039 //-----------------------------------------------------------------------
01040 
01041 /* So things look consistent in everyone's emacs... */
01042 /* Local Variables: */
01043 /* indent-tabs-mode: nil */
01044 /* End: */