00001 /*! 00002 \file Gist/train-gecb.C 00003 \brief Train and classify images using the context-based gist 00004 estimator. 00005 */ 00006 00007 // //////////////////////////////////////////////////////////////////// // 00008 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00009 // University of Southern California (USC) and the iLab at USC. // 00010 // See http://iLab.usc.edu for information about this project. // 00011 // //////////////////////////////////////////////////////////////////// // 00012 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00013 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00014 // in Visual Environments, and Applications'' by Christof Koch and // 00015 // Laurent Itti, California Institute of Technology, 2001 (patent // 00016 // pending; application number 09/912,225 filed July 23, 2001; see // 00017 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00018 // //////////////////////////////////////////////////////////////////// // 00019 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00020 // // 00021 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00022 // redistribute it and/or modify it under the terms of the GNU General // 00023 // Public License as published by the Free Software Foundation; either // 00024 // version 2 of the License, or (at your option) any later version. // 00025 // // 00026 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00027 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00028 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00029 // PURPOSE. See the GNU General Public License for more details. // 00030 // // 00031 // You should have received a copy of the GNU General Public License // 00032 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00033 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00034 // Boston, MA 02111-1307 USA. // 00035 // //////////////////////////////////////////////////////////////////// // 00036 // 00037 // Primary maintainer for this file: Manu Viswanathan <mviswana at usc dot edu> 00038 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Gist/train-gecb.C $ 00039 // $Id: train-gecb.C 14755 2011-04-29 05:55:18Z itti $ 00040 // 00041 00042 //------------------------------ HEADERS -------------------------------- 00043 00044 #include "Image/OpenCVUtil.H" // must be first to avoid conflicting defs of int64, uint64 00045 00046 // Gist specific headers 00047 #include "Neuro/GistEstimatorContextBased.H" 00048 00049 // Other INVT headers 00050 #include "Neuro/StdBrain.H" 00051 #include "Neuro/NeuroOpts.H" 00052 #include "Neuro/NeuroSimEvents.H" 00053 00054 #include "Media/SimFrameSeries.H" 00055 #include "Media/MediaOpts.H" 00056 00057 #include "Simulation/SimEventQueue.H" 00058 #include "Simulation/SimEventQueueConfigurator.H" 00059 00060 #include "Channels/ChannelOpts.H" 00061 #include "Component/ModelManager.H" 00062 #include "Component/ModelOptionDef.H" 00063 00064 #include "Image/MathOps.H" 00065 #include "Image/MatrixOps.H" 00066 #include "Image/Point2D.H" 00067 00068 #include "nub/ref.h" 00069 00070 #ifndef HAVE_OPENCV // fake OpenCV API so as to not break builds 00071 00072 namespace { 00073 00074 struct CvMat {int rows, cols, type ;} ; 00075 00076 inline CvMat* cvCreateMat(int, int, int) {return 0 ;} 00077 inline void cvReleaseMat(CvMat**) {} 00078 inline double cvmGet(CvMat*, int, int) {return 0 ;} 00079 inline void cvmSet(CvMat*, int, int, double) {} 00080 inline void cvTranspose(const CvMat*, CvMat*) {} 00081 00082 #define CV_32FC1 0 00083 inline int CV_MAT_TYPE(int) {return 0 ;} 00084 #define CV_MAT_ELEM(matrix, type, row, col) (type(0)) 00085 00086 #define CV_PCA_DATA_AS_COL 0 00087 inline void cvCalcPCA(const CvMat*, CvMat*, CvMat*, CvMat*, int) {} 00088 00089 } 00090 00091 #endif // OpenCV availability check 00092 00093 // Standard C++ headers 00094 #include <fstream> 00095 #include <sstream> 00096 #include <ios> 00097 #include <numeric> 00098 #include <algorithm> 00099 #include <functional> 00100 #include <map> 00101 #include <vector> 00102 #include <iterator> 00103 #include <stdexcept> 00104 #include <utility> 00105 00106 //------------------------------ DEFINES -------------------------------- 00107 00108 // "Raw" gist vectors are collections of 384 numbers. To reduce the 00109 // dimensionality of these vectors to make them faster to compare, etc. 00110 // we use PCA and extract the following number of principal components. 00111 #ifndef GECB_NUM_PRINCIPAL_COMPONENTS 00112 #define GECB_NUM_PRINCIPAL_COMPONENTS 80 00113 #endif 00114 00115 //----------------------- FORWARD DECLARATIONS -------------------------- 00116 00117 namespace { 00118 00119 // Some useful types for dealing with gist training vectors 00120 typedef Image<double> GistVector ; 00121 typedef std::map<int, GistVector> TrainingDB ; 00122 typedef TrainingDB::value_type TrainingDBEntry ; 00123 00124 // PCA 00125 class OpenCVMatrix ; 00126 CvMat* load_training_vectors(const std::string& file_name, int M, int N) ; 00127 CvMat* pca(const OpenCVMatrix& data, int num_principal_components) ; 00128 00129 // Image classification 00130 typedef std::pair<std::string, GistVector> InputImageData ; 00131 void classify_image(const InputImageData&, const TrainingDB&, 00132 const std::string& results_file) ; 00133 00134 // I/O 00135 void save(const OpenCVMatrix&, const std::string& file_name) ; 00136 00137 void append(const Image<double>&, 00138 const std::string& file_name, 00139 const std::string& image_name = std::string()) ; 00140 Image<double> load_image(const std::string& file_name, 00141 int width, int height) ; 00142 std::ostream& operator<<(std::ostream&, const Image<double>&) ; 00143 std::istream& operator>>(std::istream&, Image<double>&) ; 00144 00145 TrainingDB load_training_database(const std::string& file_name) ; 00146 00147 std::string getline(std::istream&) ; 00148 00149 // Utilities 00150 int count_lines(const std::string& file_name) ; 00151 template<typename T> std::string to_string(const T&) ; 00152 00153 } 00154 00155 //-------------------------- OPENCV MATRICES ---------------------------- 00156 00157 // Crude encapsulation of OpenCV matrices 00158 namespace { 00159 00160 class OpenCVMatrix { 00161 CvMat* matrix ; 00162 public : 00163 OpenCVMatrix(int num_rows, int num_cols, int type) ; 00164 OpenCVMatrix(CvMat*) ; 00165 ~OpenCVMatrix() ; 00166 00167 int num_rows() const {return matrix->rows ;} 00168 int num_cols() const {return matrix->cols ;} 00169 int type() const {return CV_MAT_TYPE(matrix->type) ;} 00170 00171 template<typename T> // T must match matrix->type (float for CV_32FC1, etc.) 00172 T get(int i, int j) const {return CV_MAT_ELEM(*matrix, T, i, j) ;} 00173 00174 operator CvMat*() const {return matrix ;} // auto conv. (usually a bad idea) 00175 } ; 00176 00177 OpenCVMatrix::OpenCVMatrix(int num_rows, int num_cols, int type) 00178 : matrix(cvCreateMat(num_rows, num_cols, type)) 00179 { 00180 if (! matrix) 00181 throw std::runtime_error("unable to create OpenCV matrix") ; 00182 } 00183 00184 OpenCVMatrix::OpenCVMatrix(CvMat* M) 00185 : matrix(M) 00186 { 00187 if (! matrix) 00188 throw std::runtime_error("cannot create empty/null matrix") ; 00189 } 00190 00191 OpenCVMatrix::~OpenCVMatrix() 00192 { 00193 cvReleaseMat(& matrix) ; 00194 } 00195 00196 } // end of local namespace encapsulating above helper 00197 00198 //----------------------- COMMAND LINE OPTIONS -------------------------- 00199 00200 // This program has four distinct phases/modes of operation, each one 00201 // specified via a suitable non-option command line argument. 00202 // Additionally, it supports several command line options to allow users 00203 // to tweak various parameters such as the name of the PCA matrix file, 00204 // training database, and so on. 00205 namespace { 00206 00207 const ModelOptionCateg MOC_GECB = { 00208 MOC_SORTPRI_3, 00209 "Options specific to the context-based gist program", 00210 } ; 00211 00212 // In the training vectors accumulation phase, we collect all the gist 00213 // vectors that will be used as the input to the PCA into a plain text 00214 // file. 00215 #ifndef GECB_DEFAULT_TRAINING_VECTORS_FILE 00216 #define GECB_DEFAULT_TRAINING_VECTORS_FILE "gecb_training_vectors.txt" 00217 #endif 00218 00219 const ModelOptionDef OPT_TrainingVectors = { 00220 MODOPT_ARG_STRING, "TrainingVectors", & MOC_GECB, OPTEXP_CORE, 00221 "This option specifies the name of the file where the training vectors\n" 00222 "should be accumulated or read from. This is a plain text file containing\n" 00223 "the training vectors matrix that will be fed into the PCA procedure.\n" 00224 "Each line of this file will contain a row of \"raw\" (i.e., 384-\n" 00225 "dimensional) gist vectors. For PCA, these rows will be read in as the\n" 00226 "columns of the data matrix.\n", 00227 "training-vectors", '\0', "training-vectors-file", 00228 GECB_DEFAULT_TRAINING_VECTORS_FILE, 00229 } ; 00230 00231 // For PCA, we use the accumulated training vectors, i.e., the 00232 // 384-dimensional "raw" gist vectors, and produce the transformation 00233 // matrix that will convert these raw gist vectors into 80-dimensional 00234 // vectors. This transformation matrix is stored in the following file. 00235 #ifndef GECB_DEFAULT_PCA_MATRIX_FILE 00236 #define GECB_DEFAULT_PCA_MATRIX_FILE "gecb_pca_matrix.txt" 00237 #endif 00238 00239 const ModelOptionDef OPT_PCAMatrix = { 00240 MODOPT_ARG_STRING, "PCAMatrix", & MOC_GECB, OPTEXP_CORE, 00241 "This option specifies the name of the file in which the 384x80 PCA\n" 00242 "transformation matrix is (or will be) stored. \"Raw\" 384-dimensional\n" 00243 " gist vectors can be reduced to 80 dimensions by muliplying with this\n" 00244 "matrix. The PCA transformation matrix is stored in a plain text file\n" 00245 "with each line containing one row of the matrix.\n", 00246 "pca-matrix", '\0', "pca-matrix-file", 00247 GECB_DEFAULT_PCA_MATRIX_FILE, 00248 } ; 00249 00250 // In the second phase of training, we use the PCA transformation matrix 00251 // to output the 80-dimensional gist vectors of the training images and 00252 // store them in a training set under the specified "entry name" and 00253 // segment number. 00254 // 00255 // DEVNOTE: The default values for these options are not very useful. 00256 // They really ought to be specified on the command line. 00257 #ifndef GECB_DEFAULT_IMAGE_NAME 00258 #define GECB_DEFAULT_IMAGE_NAME "some_image" 00259 #endif 00260 #ifndef GECB_DEFAULT_SEGMENT_NUMBER 00261 #define GECB_DEFAULT_SEGMENT_NUMBER "0" 00262 #endif 00263 00264 const ModelOptionDef OPT_ImageName = { 00265 MODOPT_ARG_STRING, "ImageName", & MOC_GECB, OPTEXP_CORE, 00266 "This option specifies the \"root\" name of an entry in the training\n" 00267 "set or the results file. The image number will be automatically\n" 00268 "appended to this \"root\" name. The training database is a plain text\n" 00269 "file containing one entry per line. The first field specifies the name\n" 00270 "plus number of the entry (e.g., foo.1, foo.2, bar.1, and so on). The\n" 00271 "second field is the segment number for this image. The remaining fields\n" 00272 "are the 80 numbers making up the image's gist vector.\n\n" 00273 "In classification mode, this option specifies the name of the input\n" 00274 "image's gist vector that is written to the results file.\n", 00275 "image-name", '\0', "image-name-root", 00276 GECB_DEFAULT_IMAGE_NAME, 00277 } ; 00278 00279 const ModelOptionDef OPT_SegmentNumber = { 00280 MODOPT_ARG_STRING, "SegmentNumber", & MOC_GECB, OPTEXP_CORE, 00281 "This option specifies the segment number for an image in the training\n" 00282 "set. The segment number is used in the third phase of training to\n" 00283 "compute the mean 80-D gist vectors for each segment and during\n" 00284 "classification to ascertain the segment number for each input image.\n", 00285 "segment-number", '\0', "image-segment-number", 00286 GECB_DEFAULT_SEGMENT_NUMBER, 00287 } ; 00288 00289 // The output of the second phase of training, i.e., the 80-D gist 00290 // vectors for each of the training images, is sent to the following 00291 // file. 00292 #ifndef GECB_DEFAULT_TRAINING_SET 00293 #define GECB_DEFAULT_TRAINING_SET "gecb_training_set.txt" 00294 #endif 00295 00296 const ModelOptionDef OPT_TrainingSet = { 00297 MODOPT_ARG_STRING, "TrainingSet", & MOC_GECB, OPTEXP_CORE, 00298 "This option specifies the name of the training set, a plain text\n" 00299 "file containing one entry per line. The first field specifies the name\n" 00300 "plus number of the entry (e.g., foo.1, foo.2, bar.1, and so on). The\n" 00301 "second field is the segment number for this image. And the remaining\n" 00302 "fields are the 80 numbers that make up the image's gist vector.\n", 00303 "training-set", '\0', "training-set-file", 00304 GECB_DEFAULT_TRAINING_SET, 00305 } ; 00306 00307 // The training database is a plain text file that specifies the mean 00308 // gist vector for each segment. 00309 #ifndef GECB_DEFAULT_TRAINING_DATABASE 00310 #define GECB_DEFAULT_TRAINING_DATABASE "gecb_training_db.txt" 00311 #endif 00312 00313 const ModelOptionDef OPT_TrainingDB = { 00314 MODOPT_ARG_STRING, "TrainingDB", & MOC_GECB, OPTEXP_CORE, 00315 "This option specifies the name of the training database, a plain text\n" 00316 "file containing one entry per line. The first field specifies the\n" 00317 "segment number. And the remaining fields are the 80 numbers that make\n" 00318 "up the segment's mean gist vector.\n", 00319 "training-db", '\0', "training-db-file", 00320 GECB_DEFAULT_TRAINING_DATABASE, 00321 } ; 00322 00323 // In image classification mode, we write the results to the following 00324 // file. 00325 #ifndef GECB_DEFAULT_CLASSIFICATION_RESULTS_FILE 00326 #define GECB_DEFAULT_CLASSIFICATION_RESULTS_FILE "gecb_classifications.txt" 00327 #endif 00328 00329 const ModelOptionDef OPT_ResultsFile = { 00330 MODOPT_ARG_STRING, "ResultsFile", & MOC_GECB, OPTEXP_CORE, 00331 "This option specifies the name of the classification results file,\n" 00332 "a plain text file containing one result entry per line. The first\n" 00333 "field specifies the name plus number of the input image, (e.g., foo.1,\n" 00334 "foo.2, bar.1, and so on). Then come the numbers of the top five matching\n" 00335 "segments from the training database.\n", 00336 "results-file", '\0', "classification-results-file", 00337 GECB_DEFAULT_CLASSIFICATION_RESULTS_FILE, 00338 } ; 00339 00340 // The different operational modes of this program must be specified as 00341 // the one and only non-option command line argument. This "action" 00342 // command must be one of the following strings (case-sensitive!): 00343 // 00344 // 1. accumulate -- accumulate the training vectors in the plain text 00345 // file specified by the --training-vectors option (default is to 00346 // accumulate the training vectors in ./gecb_training_vectors.txt. 00347 // 00348 // 2. pca -- compute the transformation matrix that will allow us to 00349 // reduce the 384-dimensional "raw" gist vectors to their 80 principal 00350 // components using the PCA support in OpenCV. 00351 // 00352 // The --training-vectors option can be used to specify the input file 00353 // for the PCA and --pca-matrix option can be used to specify the file 00354 // in which the transformation matrix should be saved. The defaults 00355 // are to read from ./gecb_training_vectors.txt and write to 00356 // ./gecb_pca_matrix.txt. 00357 // 00358 // 3. train -- compute the gist vectors for the training set. The output 00359 // is sent to the text file specified by the --training-set option. 00360 // It is a good idea to also supply the --image-name and 00361 // --segment-number options when saving training gist vectors from an 00362 // MPEG. A good choice of the entry's name would be the basename of 00363 // the MPEG file sans extension. 00364 // 00365 // 4. means -- given the 80-dimensional gist vectors for the training 00366 // images, this action computes the mean gist vectors for each 00367 // segment. The output is sent to the text file specified by the 00368 // --training-db option. 00369 // 00370 // 5. classify -- uses the PCA transformation matrix and training 00371 // database produced by the pca and means commands to classify the 00372 // input images streaming in. 00373 #ifndef GECB_ACCUMULATE_CMD 00374 #define GECB_ACCUMULATE_CMD "accumulate" 00375 #endif 00376 #ifndef GECB_PCA_CMD 00377 #define GECB_PCA_CMD "pca" 00378 #endif 00379 #ifndef GECB_TRAIN_CMD 00380 #define GECB_TRAIN_CMD "train" 00381 #endif 00382 #ifndef GECB_MEANS_CMD 00383 #define GECB_MEANS_CMD "means" 00384 #endif 00385 #ifndef GECB_CLASSIFY_CMD 00386 #define GECB_CLASSIFY_CMD "classify" 00387 #endif 00388 00389 // For printing usage info 00390 #ifndef GECB_ACTIONS 00391 #define GECB_ACTIONS ("{"GECB_ACCUMULATE_CMD"|"GECB_PCA_CMD"|"\ 00392 GECB_TRAIN_CMD"|"GECB_MEANS_CMD"|"\ 00393 GECB_CLASSIFY_CMD"}") 00394 #endif 00395 00396 } // end of local namespace encapsulating command line options section 00397 00398 //--------------------- SIMULATION ENCAPSULATION ------------------------ 00399 00400 namespace { 00401 00402 // The following helper class wraps around the ModelManager and 00403 // associated objects, providing a neatly encapsulated API for the main 00404 // program. 00405 class ContextBasedSimulation { 00406 ModelManager model_manager ; 00407 nub::soft_ref<SimEventQueueConfigurator> configurator ; 00408 nub::soft_ref<StdBrain> brain ; 00409 nub::ref<SimInputFrameSeries> input_frame_series ; 00410 00411 // Various command line options specific to this program 00412 OModelParam<std::string> tv_option ; // --training-vectors 00413 OModelParam<std::string> pm_option ; // --pca-matrix 00414 OModelParam<std::string> ts_option ; // --training-set 00415 OModelParam<std::string> td_option ; // --training-db 00416 OModelParam<std::string> rf_option ; // --results-file 00417 OModelParam<std::string> in_option ; // --image-name (not --in!) 00418 OModelParam<std::string> sn_option ; // --segment-number 00419 00420 public : 00421 ContextBasedSimulation(const std::string& model_name) ; 00422 void parse_command_line(int argc, const char* argv[]) ; 00423 void run() ; 00424 ~ContextBasedSimulation() ; 00425 00426 private : 00427 // The different actions performed by this program 00428 typedef void (ContextBasedSimulation::*Action)() ; 00429 typedef std::map<std::string, Action> ActionMap ; 00430 ActionMap action_map ; 00431 00432 void accumulate_training_vectors() ; // for input to PCA 00433 void compute_pca_matrix() ; // using training vectors 00434 void compute_training_vectors() ; // using PCA matrix 00435 void compute_segment_means() ; // using training vectors 00436 void classify_input_images() ; // using PCA matrix & segment means 00437 00438 // Accessors for retrieving some of the command line arguments 00439 std::string training_vectors_file() {return tv_option.getVal() ;} 00440 std::string pca_matrix_file() {return pm_option.getVal() ;} 00441 std::string training_set() {return ts_option.getVal() ;} 00442 std::string training_database() {return td_option.getVal() ;} 00443 std::string results_file() {return rf_option.getVal() ;} 00444 std::string image_name() {return in_option.getVal() ;} 00445 std::string segment_number() {return sn_option.getVal() ;} 00446 } ; 00447 00448 // On instantiation, create the model manager and the simulation's 00449 // various components. 00450 ContextBasedSimulation::ContextBasedSimulation(const std::string& model_name) 00451 : model_manager(model_name), 00452 configurator(new SimEventQueueConfigurator(model_manager)), 00453 brain(new StdBrain(model_manager)), 00454 input_frame_series(new SimInputFrameSeries(model_manager)), 00455 tv_option(& OPT_TrainingVectors, & model_manager), 00456 pm_option(& OPT_PCAMatrix, & model_manager), 00457 ts_option(& OPT_TrainingSet, & model_manager), 00458 td_option(& OPT_TrainingDB, & model_manager), 00459 rf_option(& OPT_ResultsFile, & model_manager), 00460 in_option(& OPT_ImageName, & model_manager), 00461 sn_option(& OPT_SegmentNumber, & model_manager) 00462 { 00463 model_manager.addSubComponent(configurator) ; 00464 model_manager.addSubComponent(brain) ; 00465 model_manager.addSubComponent(input_frame_series) ; 00466 00467 typedef ContextBasedSimulation me ; // typing shortcut 00468 action_map[GECB_ACCUMULATE_CMD] = & me::accumulate_training_vectors ; 00469 action_map[GECB_PCA_CMD] = & me::compute_pca_matrix ; 00470 action_map[GECB_TRAIN_CMD] = & me::compute_training_vectors ; 00471 action_map[GECB_MEANS_CMD] = & me::compute_segment_means ; 00472 action_map[GECB_CLASSIFY_CMD] = & me::classify_input_images ; 00473 } 00474 00475 // TODO: Do we really need the single channel save raw maps option for 00476 // this texton training program? And how can we force the gist estimator 00477 // type to be always GistEstimatorTexton? This program doesn't make sense 00478 // for any other gist estimator. 00479 void ContextBasedSimulation::parse_command_line(int argc, const char* argv[]) 00480 { 00481 model_manager.setOptionValString(& OPT_SingleChannelSaveRawMaps, "true") ; 00482 model_manager.setOptionValString(& OPT_GistEstimatorType, "ContextBased") ; 00483 model_manager.setOptionValString(& OPT_NumOrientations, "6") ; 00484 00485 model_manager.setOptionValString(& OPT_TrainingVectors, 00486 GECB_DEFAULT_TRAINING_VECTORS_FILE) ; 00487 model_manager.setOptionValString(& OPT_PCAMatrix, 00488 GECB_DEFAULT_PCA_MATRIX_FILE) ; 00489 model_manager.setOptionValString(& OPT_TrainingSet, 00490 GECB_DEFAULT_TRAINING_SET) ; 00491 model_manager.setOptionValString(& OPT_TrainingDB, 00492 GECB_DEFAULT_TRAINING_DATABASE) ; 00493 model_manager.setOptionValString(& OPT_ResultsFile, 00494 GECB_DEFAULT_CLASSIFICATION_RESULTS_FILE) ; 00495 model_manager.setOptionValString(& OPT_ImageName, 00496 GECB_DEFAULT_IMAGE_NAME) ; 00497 model_manager.setOptionValString(& OPT_SegmentNumber, 00498 GECB_DEFAULT_SEGMENT_NUMBER) ; 00499 00500 if (! model_manager.parseCommandLine(argc, argv, GECB_ACTIONS, 1, 1)) 00501 throw std::runtime_error("command line parse error") ; 00502 } 00503 00504 // To run the simulation, we simply dispatch to the function 00505 // corresponding to the action (non-option) command line argument. 00506 void ContextBasedSimulation::run() 00507 { 00508 std::string cmd(model_manager.getExtraArg(0)) ; 00509 ActionMap::iterator action = action_map.find(cmd) ; 00510 if (action == action_map.end()) 00511 throw std::runtime_error(cmd + ": sorry, unknown action") ; 00512 (this->*(action->second))() ; 00513 } 00514 00515 // Quick helper class to start and stop model manager (useful when 00516 // exceptions are thrown because destructor automatically stops the model 00517 // manager without requiring an explicit call to the stop method prior to 00518 // throwing the exception). 00519 class ModelManagerStarter { 00520 ModelManager& mgr ; 00521 public : 00522 ModelManagerStarter(ModelManager& m) : mgr(m) {mgr.start() ;} 00523 ~ModelManagerStarter() {mgr.stop() ;} 00524 } ; 00525 00526 // This method implements the simulation's main loop for the "accumulate" 00527 // action, which evolves the different components of the simulation and 00528 // saves the raw (384-dimensional) gist vectors to the training vectors 00529 // file. 00530 void ContextBasedSimulation::accumulate_training_vectors() 00531 { 00532 ModelManagerStarter M(model_manager) ; 00533 00534 LFATAL("fixme"); 00535 nub::soft_ref<GistEstimatorContextBased> ge;///// = 00536 /////// dynCastWeak<GistEstimatorContextBased>(brain->getGE()) ; 00537 if (ge.isInvalid()) 00538 throw std::runtime_error("can only use GistEstimatorContextBased") ; 00539 00540 nub::ref<SimEventQueue> event_queue = configurator->getQ() ; 00541 for(;;) 00542 { 00543 try 00544 { 00545 SeC<SimEventGistOutput> gist_out = 00546 event_queue->check<SimEventGistOutput>(brain.get(), 00547 SEQ_UNMARKED | SEQ_MARK, 00548 ge.get()) ; 00549 if (gist_out) // gist vector waiting to be picked up 00550 append(ge->getGist(), training_vectors_file()) ; 00551 if (event_queue->evolve() != SIM_CONTINUE) 00552 break ; 00553 } 00554 catch (lfatal_exception&) // if we seek beyond end of frame series 00555 { 00556 return ; // prevent LFATAL induced abortion 00557 } 00558 } 00559 } 00560 00561 // The following method implements the "pca" action of this program, 00562 // which produces the PCA transformation matrix that allows us to reduce 00563 // the dimensionality of gist vectors from 384 to 80. 00564 void ContextBasedSimulation::compute_pca_matrix() 00565 { 00566 LINFO("MVN: counting lines in %s", training_vectors_file().c_str()) ; 00567 const int cols = count_lines(training_vectors_file()) ; 00568 const int rows = static_cast<int>(GistEstimatorContextBased::NUM_FEATURES) ; 00569 00570 LINFO("MVN: reading %d training vectors from %s", 00571 cols, training_vectors_file().c_str()) ; 00572 OpenCVMatrix training_vectors = 00573 load_training_vectors(training_vectors_file(), rows, cols) ; 00574 00575 LINFO("MVN: doing PCA on training vectors to get %d principal components", 00576 GECB_NUM_PRINCIPAL_COMPONENTS) ; 00577 OpenCVMatrix pca_matrix = pca(training_vectors, 00578 GECB_NUM_PRINCIPAL_COMPONENTS) ; 00579 00580 LINFO("MVN: PCA done; saving PCA transformation matrix to %s", 00581 pca_matrix_file().c_str()) ; 00582 save(pca_matrix, pca_matrix_file()) ; 00583 } 00584 00585 // This method implements the "train" action of this program. Like the 00586 // accumulate action, it implements a "main loop" for the simulation, 00587 // evolving different components with each iteration. But rather than 00588 // saving raw gist vectors to a file, it uses the transformation matrix 00589 // computed by the "pca" action to reduce the dimensionality of the raw 00590 // gist vectors from 384 to 80 and then saves these 80-dimensional 00591 // vectors to the training database. 00592 void ContextBasedSimulation::compute_training_vectors() 00593 { 00594 ModelManagerStarter M(model_manager) ; 00595 00596 LFATAL("fixme"); 00597 nub::soft_ref<GistEstimatorContextBased> ge;///// = 00598 //////// dynCastWeak<GistEstimatorContextBased>(brain->getGE()) ; 00599 if (ge.isInvalid()) 00600 throw std::runtime_error("can only use GistEstimatorContextBased") ; 00601 00602 Image<double> W = load_image(pca_matrix_file(), 00603 GECB_NUM_PRINCIPAL_COMPONENTS, 00604 count_lines(pca_matrix_file())) ; 00605 LINFO("MVN: loaded %dx%d PCA transformation matrix from %s", 00606 W.getHeight(), W.getWidth(), pca_matrix_file().c_str()) ; 00607 00608 int i = 1 ; 00609 nub::ref<SimEventQueue> event_queue = configurator->getQ() ; 00610 for(;;) 00611 { 00612 try 00613 { 00614 SeC<SimEventGistOutput> gist_out = 00615 event_queue->check<SimEventGistOutput>(brain.get(), 00616 SEQ_UNMARKED | SEQ_MARK, 00617 ge.get()) ; 00618 if (gist_out) // gist vector waiting to be picked up 00619 append(vmMult(ge->getGist(), W), 00620 training_set(), 00621 image_name() + to_string(i++) + " " + segment_number()) ; 00622 if (event_queue->evolve() != SIM_CONTINUE) 00623 break ; 00624 } 00625 catch (lfatal_exception&) // if we seek beyond end of frame series 00626 { 00627 return ; // prevent LFATAL induced abortion 00628 } 00629 } 00630 } 00631 00632 // Once we have the training set, we compute the segment means by 00633 // looking at the segment number for each image in the training set and 00634 // summing its gist vector to the running total for that segment. At the 00635 // end of this process, we simply divide the totals for each segment by 00636 // the count of gist vectors encountered for that segment. 00637 // 00638 // This averaging operation is facilitated by a map that associates a 00639 // segment number with a gist vector sum and a corresponding count. 00640 void ContextBasedSimulation::compute_segment_means() 00641 { 00642 typedef std::pair<int, GistVector> SegmentInfo ; // count and sum 00643 typedef std::map<int, SegmentInfo> MeansMap ; 00644 MeansMap means ; 00645 00646 LINFO("reading training set data from %s", training_set().c_str()) ; 00647 std::ifstream ifs(training_set().c_str()) ; 00648 for(;;) 00649 { 00650 try 00651 { 00652 std::istringstream line(getline(ifs)) ; 00653 00654 std::string name ; int segment ; 00655 GistVector G(GECB_NUM_PRINCIPAL_COMPONENTS, 1, ZEROS) ; 00656 line >> name >> segment >> G ; 00657 00658 MeansMap::iterator it = means.find(segment) ; 00659 if (it == means.end()) 00660 { 00661 means.insert(std::make_pair(segment, SegmentInfo(1, G))) ; 00662 } 00663 else 00664 { 00665 SegmentInfo& I = it->second ; 00666 ++I.first ; // count 00667 I.second += G ; // gist vector sum 00668 } 00669 } 00670 catch (std::exception&) // ought to be just EOF for ifs 00671 { 00672 break ; 00673 } 00674 } 00675 00676 LINFO("computing gist vector averages for each segment and saving to %s", 00677 training_database().c_str()) ; 00678 std::ofstream ofs(training_database().c_str()) ; 00679 for (MeansMap::iterator it = means.begin(); it != means.end(); ++it) 00680 { 00681 SegmentInfo& I = it->second ; 00682 I.second /= I.first ; // gist vector totals by num vectors for segment 00683 ofs << it->first << ' ' << I.second ; 00684 } 00685 } 00686 00687 // The following method implements this program's "classify" action. It 00688 // loads the PCA transformation matrix and the training vectors database 00689 // and then uses a simple Euclidean distance measure to compute the 00690 // closest match for the input image. 00691 void ContextBasedSimulation::classify_input_images() 00692 { 00693 ModelManagerStarter M(model_manager) ; 00694 00695 LFATAL("fixme"); 00696 nub::soft_ref<GistEstimatorContextBased> ge;///////// = 00697 ////// dynCastWeak<GistEstimatorContextBased>(brain->getGE()) ; 00698 if (ge.isInvalid()) 00699 throw std::runtime_error("can only use GistEstimatorContextBased") ; 00700 00701 Image<double> W = load_image(pca_matrix_file(), 00702 GECB_NUM_PRINCIPAL_COMPONENTS, 00703 count_lines(pca_matrix_file())) ; 00704 LINFO("MVN: loaded %dx%d PCA transformation matrix from %s", 00705 W.getHeight(), W.getWidth(), pca_matrix_file().c_str()) ; 00706 00707 TrainingDB training_db = load_training_database(training_database()) ; 00708 LINFO("MVN: loaded %d training vectors from %s", 00709 int(training_db.size()), training_database().c_str()) ; 00710 00711 int i = 1 ; 00712 nub::ref<SimEventQueue> event_queue = configurator->getQ() ; 00713 for(;;) 00714 { 00715 try 00716 { 00717 SeC<SimEventGistOutput> gist_out = 00718 event_queue->check<SimEventGistOutput>(brain.get(), 00719 SEQ_UNMARKED | SEQ_MARK, 00720 ge.get()) ; 00721 if (gist_out) // gist vector waiting to be picked up 00722 classify_image(std::make_pair(image_name() + to_string(i++), 00723 vmMult(ge->getGist(), W)), 00724 training_db, results_file()) ; 00725 if (event_queue->evolve() != SIM_CONTINUE) 00726 break ; 00727 } 00728 catch (lfatal_exception&) // if we seek beyond end of frame series 00729 { 00730 return ; // prevent LFATAL induced abortion 00731 } 00732 } 00733 } 00734 00735 // Do we really not have to delete the configurator, brain and input 00736 // frame series? If it turns out we do, this empty destructor will have 00737 // to be filled out with the necessary delete calls... 00738 ContextBasedSimulation::~ContextBasedSimulation() {} 00739 00740 } // end of local namespace encapsulating simulation encapsulation section 00741 00742 //------------------------------- MAIN ---------------------------------- 00743 00744 #ifdef HAVE_OPENCV 00745 00746 int main(int argc, const char* argv[]) 00747 { 00748 MYLOGVERB = LOG_INFO ; // suppress debug messages 00749 try 00750 { 00751 ContextBasedSimulation S("train-gecb Model") ; 00752 S.parse_command_line(argc, argv) ; 00753 S.run() ; 00754 } 00755 catch (std::exception& e) 00756 { 00757 LFATAL("%s", e.what()) ; 00758 return 1 ; 00759 } 00760 return 0 ; 00761 } 00762 00763 #else 00764 00765 int main() 00766 { 00767 LINFO("Sorry, this program needs OpenCV.") ; 00768 return 1 ; 00769 } 00770 00771 #endif 00772 00773 //-------------------------------- PCA ---------------------------------- 00774 00775 namespace { 00776 00777 // Create the MxN data matrix for PCA from the training vectors file. 00778 // Each line of this file becomes a column of the data matrix. 00779 CvMat* load_training_vectors(const std::string& file_name, int M, int N) 00780 { 00781 CvMat* data = cvCreateMat(M, N, CV_32FC1) ; 00782 00783 double d ; 00784 std::ifstream ifs(file_name.c_str()) ; 00785 for (int j = 0; j < N; ++j) 00786 for (int i = 0; i < M; ++i) { // column-major reading 00787 if (! ifs) { 00788 cvReleaseMat(& data) ; 00789 throw std::runtime_error(file_name + ": out of data?!?") ; 00790 } 00791 ifs >> d ; 00792 cvmSet(data, i, j, d) ; 00793 } 00794 00795 return data ; 00796 } 00797 00798 // Return the PCA transformation matrix that will allow extraction of the 00799 // D principal components from input data. 00800 CvMat* pca(const OpenCVMatrix& data, int D) 00801 { 00802 OpenCVMatrix means = cvCreateMat(data.num_rows(), 1, CV_32FC1) ; 00803 OpenCVMatrix eigenvalues = cvCreateMat(D, 1, CV_32FC1) ; 00804 OpenCVMatrix eigenvectors = cvCreateMat(D, data.num_rows(), CV_32FC1) ; 00805 00806 cvCalcPCA(data, means, eigenvalues, eigenvectors, CV_PCA_DATA_AS_COL) ; 00807 00808 CvMat* pca_matrix = cvCreateMat(data.num_rows(), D, CV_32FC1) ; 00809 cvTranspose(eigenvectors, pca_matrix) ; 00810 return pca_matrix ; 00811 } 00812 00813 } // end of local namespace encapsulating training vectors section 00814 00815 //----------------------- IMAGE CLASSIFICATION -------------------------- 00816 00817 // Given the 80-D gist vectors for an input image and each of the 00818 // training segments, we can tell which training segment the input image 00819 // matches most closely by performing a Euclidean distance check between 00820 // the input image's gist vector and the vectors of each of the training 00821 // segments. 00822 namespace { 00823 00824 // When computing the Euclidean distance between the input image's gist 00825 // vector and that of each of the training segments, we want to be able 00826 // to tell which training segment is the closest. For that, we use the 00827 // following pair that "maps" a training segment number to its 00828 // corresponding distance. 00829 typedef std::pair<int, double> SegmentDistance ; 00830 00831 // To sort segment distances, we want to compare the Euclidean distances 00832 // rather than their names. 00833 bool distance_cmp(const SegmentDistance& L, const SegmentDistance& R) 00834 { 00835 return L.second < R.second ; 00836 } 00837 00838 // But when writing classification results, we're only interested in the 00839 // matching training segment's number and not really in the Euclidean 00840 // distance between its gist vector and that of the input image. 00841 std::ostream& operator<<(std::ostream& os, const SegmentDistance& D) 00842 { 00843 return os << D.first ; 00844 } 00845 00846 // Given an entry from the training database, the following function 00847 // object returns the Euclidean distance between the supplied input 00848 // image's gist vector and the training segment's gist vector. 00849 struct euclidean_distance 00850 : public std::binary_function<TrainingDBEntry, GistVector, SegmentDistance> 00851 { 00852 SegmentDistance 00853 operator()(const TrainingDBEntry& E, const GistVector& input) const { 00854 return std::make_pair(E.first, distance<float>(input, E.second)) ; 00855 } 00856 } ; 00857 00858 // This function computes the Euclidean distance between the input 00859 // image's 80-D gist vector and each of the 80-D gist vectors for the 00860 // training segments and then writes the top five matches to the 00861 // specified results file. 00862 // 00863 // DEVNOTE: To output the top five matches to the results file, we ought 00864 // to be able to use the std::copy algorithm in conjunction with 00865 // std::ostream_iterator<SegmentDistance>. Unfortunately, ostream 00866 // iterators cannot be used with user-defined types. This is entirely in 00867 // keeping with C++'s philosophy of sucking ass most of the time but 00868 // sucking ass big-time only every now and then. 00869 void classify_image(const InputImageData& input, const TrainingDB& db, 00870 const std::string& results_file) 00871 { 00872 std::vector<SegmentDistance> distances ; 00873 std::transform(db.begin(), db.end(), std::back_inserter(distances), 00874 std::bind2nd(euclidean_distance(), input.second)) ; 00875 std::sort(distances.begin(), distances.end(), distance_cmp) ; 00876 00877 std::ofstream ofs(results_file.c_str(), std::ios::out | std::ios::app) ; 00878 ofs << input.first << ' ' ; 00879 //std::copy(distances.begin(), distances.begin() + 5, 00880 //std::ostream_iterator<SegmentDistance>(ofs, " ")) ; // ERROR! 00881 for (unsigned int i = 0; i < distances.size() && i < 5; ++i) 00882 ofs << distances[i] << ' ' ; 00883 ofs << '\n' ; 00884 } 00885 00886 } // end of local namespace encapsulating image classification section 00887 00888 //-------------------------------- I/O ---------------------------------- 00889 00890 namespace { 00891 00892 // Save an OpenCV matrix to the specified file 00893 void save(const OpenCVMatrix& M, const std::string& file_name) 00894 { 00895 std::ofstream ofs(file_name.c_str()) ; 00896 for (int i = 0; i < M.num_rows(); ++i) { 00897 for (int j = 0; j < M.num_cols(); ++j) 00898 ofs << M.get<float>(i, j) << ' ' ; 00899 ofs << '\n' ; 00900 } 00901 } 00902 00903 // Append an Image to the specified file, optionally writing an image 00904 // name to the first row. 00905 void append(const Image<double>& I, const std::string& file_name, 00906 const std::string& image_name) 00907 { 00908 if (! I.initialized()) 00909 throw std::runtime_error("save empty image to " + file_name + "?!?") ; 00910 if (file_name.empty()) 00911 throw std::runtime_error("must specify file name for saving Image") ; 00912 00913 std::ofstream ofs(file_name.c_str(), std::ios::out | std::ios::app) ; 00914 if (! image_name.empty()) 00915 ofs << image_name << ' ' ; 00916 ofs << I ; 00917 } 00918 00919 // Load Image stored in a file 00920 Image<double> 00921 load_image(const std::string& file_name, int width, int height) 00922 { 00923 try 00924 { 00925 Image<double> I(width, height, NO_INIT) ; 00926 std::ifstream ifs(file_name.c_str()) ; 00927 ifs >> I ; 00928 return I ; 00929 } 00930 catch (std::exception&) 00931 { 00932 throw std::runtime_error(file_name + ": out of data?!?") ; 00933 } 00934 } 00935 00936 // Stream insertion operator for an Image<double> 00937 std::ostream& operator<<(std::ostream& os, const Image<double>& I) 00938 { 00939 for (int y = 0; y < I.getHeight(); ++y) { 00940 for (int x = 0; x < I.getWidth(); ++x) 00941 os << I.getVal(x, y) << ' ' ; 00942 os << '\n' ; 00943 } 00944 return os ; 00945 } 00946 00947 // The following extraction operator reads in an Image<double> from the 00948 // supplied input stream. 00949 // 00950 // WARNING: It *assumes* that the image has already allocated some memory 00951 // for itself and that the input stream has those many elements. 00952 std::istream& operator>>(std::istream& is, Image<double>& I) 00953 { 00954 double d ; 00955 for (int y = 0; y < I.getHeight(); ++y) 00956 for (int x = 0; x < I.getWidth(); ++x) 00957 if (is >> d) 00958 I.setVal(x, y, d) ; 00959 else 00960 throw std::runtime_error("not enough data for Image<double>?!?") ; 00961 return is ; 00962 } 00963 00964 // The following function reads the training "database," which is a plain 00965 // text file containing one entry per line. Each line starts with the 00966 // segment number and then come the eighty numbers making up that 00967 // segment's mean gist vector. 00968 TrainingDB load_training_database(const std::string& file_name) 00969 { 00970 TrainingDB db ; 00971 00972 std::ifstream ifs(file_name.c_str()) ; 00973 for(;;) 00974 { 00975 try 00976 { 00977 std::istringstream line(getline(ifs)) ; 00978 00979 int segment ; 00980 GistVector G(GECB_NUM_PRINCIPAL_COMPONENTS, 1, ZEROS) ; 00981 line >> segment >> G ; 00982 00983 db.insert(std::make_pair(segment, G)) ; 00984 } 00985 catch (std::exception&) // ought only to happen on ifs EOF 00986 { 00987 break ; 00988 } 00989 } 00990 00991 return db ; 00992 } 00993 00994 // Read a line from the input stream and return it as a string 00995 std::string getline(std::istream& is) 00996 { 00997 std::string line ; 00998 getline(is, line) ; 00999 if (! is || line.empty()) // EOF 01000 throw std::runtime_error("unable to read from input stream") ; 01001 return line ; 01002 } 01003 01004 } // end of local namespace encapsulating texton accumulation section 01005 01006 //-------------------------- UTILITY ROUTINES --------------------------- 01007 01008 namespace { 01009 01010 // Convenient (but perhaps not the most efficient) helper to convert 01011 // various data types to strings. 01012 // 01013 // DEVNOTE: Works as long as type T defines an operator << that writes to 01014 // an ostream. 01015 template<typename T> 01016 std::string to_string(const T& t) 01017 { 01018 std::ostringstream str ; 01019 str << t ; 01020 return str.str() ; 01021 } 01022 01023 // Count the number of lines in a file (wc -l) 01024 int count_lines(const std::string& file_name) 01025 { 01026 int n = -1 ; // because EOF is read after final \n (1 extra iter. of loop) 01027 std::ifstream ifs(file_name.c_str()) ; 01028 01029 std::string dummy ; 01030 while (ifs) { 01031 getline(ifs, dummy) ; 01032 ++n ; 01033 } 01034 return n ; 01035 } 01036 01037 } // end of local namespace encapsulating utility routines section 01038 01039 //----------------------------------------------------------------------- 01040 01041 /* So things look consistent in everyone's emacs... */ 01042 /* Local Variables: */ 01043 /* indent-tabs-mode: nil */ 01044 /* End: */