train-texton.C

Go to the documentation of this file.
00001 /*!
00002    @file Gist/train-texton.C create the prototypical universal textons
00003          "database" from the training data
00004 */
00005 
00006 // //////////////////////////////////////////////////////////////////// //
00007 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the //
00008 // University of Southern California (USC) and the iLab at USC.         //
00009 // See http://iLab.usc.edu for information about this project.          //
00010 // //////////////////////////////////////////////////////////////////// //
00011 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00012 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00013 // in Visual Environments, and Applications'' by Christof Koch and      //
00014 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00015 // pending; application number 09/912,225 filed July 23, 2001; see      //
00016 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00017 // //////////////////////////////////////////////////////////////////// //
00018 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00019 //                                                                      //
00020 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00021 // redistribute it and/or modify it under the terms of the GNU General  //
00022 // Public License as published by the Free Software Foundation; either  //
00023 // version 2 of the License, or (at your option) any later version.     //
00024 //                                                                      //
00025 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00026 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00027 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00028 // PURPOSE.  See the GNU General Public License for more details.       //
00029 //                                                                      //
00030 // You should have received a copy of the GNU General Public License    //
00031 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00032 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00033 // Boston, MA 02111-1307 USA.                                           //
00034 // //////////////////////////////////////////////////////////////////// //
00035 //
00036 // Primary maintainer for this file: mviswana usc edu
00037 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Gist/train-texton.C $
00038 // $Id: train-texton.C 14605 2011-03-15 02:25:06Z dparks $
00039 //
00040 
00041 //------------------------------ HEADERS --------------------------------
00042 
00043 #include "Image/OpenCVUtil.H"  // must be first to avoid conflicting defs of int64, uint64
00044 
00045 // Gist specific headers
00046 #include "Neuro/GistEstimatorTexton.H"
00047 
00048 // Other INVT headers
00049 #include "Neuro/StdBrain.H"
00050 #include "Neuro/NeuroOpts.H"
00051 #include "Neuro/NeuroSimEvents.H"
00052 
00053 #include "Media/SimFrameSeries.H"
00054 #include "Media/MediaOpts.H"
00055 
00056 #include "Simulation/SimEventQueue.H"
00057 #include "Simulation/SimEventQueueConfigurator.H"
00058 
00059 #include "Channels/ChannelOpts.H"
00060 #include "Component/ModelManager.H"
00061 #include "Component/ModelOptionDef.H"
00062 
00063 #include "Image/Point2D.H"
00064 
00065 #include "nub/ref.h"
00066 
00067 #ifndef HAVE_OPENCV // fake OpenCV API so as to not break builds
00068 namespace {
00069 
00070 struct CvMat {int rows, cols, type ;} ;
00071 
00072 inline CvMat* cvCreateMat(int, int, int) {return 0 ;}
00073 inline void   cvZero(CvMat *) {}
00074 inline void   cvReleaseMat(CvMat**) {}
00075 inline double cvmGet(CvMat*, int, int) {return 0 ;}
00076 inline void   cvmSet(CvMat*, int, int, double) {}
00077 inline int    cvTermCriteria(int, int, double) {return 0 ;}
00078 inline void   cvKMeans2(CvMat*, int, CvMat*, int) {}
00079 
00080 #define CV_32FC1 0
00081 #define CV_32SC1 0
00082 inline int CV_MAT_TYPE(int) {return 0 ;}
00083 #define CV_MAT_ELEM(matrix, type, row, col) (type(0))
00084 
00085 #define CV_TERMCRIT_EPS  0
00086 #define CV_TERMCRIT_ITER 0
00087 
00088 }
00089 
00090 #endif // OpenCV availability check
00091 
00092 // Standard C++ headers
00093 #include <fstream>
00094 #include <sstream>
00095 #include <ios>
00096 #include <numeric>
00097 #include <algorithm>
00098 #include <functional>
00099 #include <map>
00100 #include <vector>
00101 #include <iterator>
00102 #include <stdexcept>
00103 #include <utility>
00104 #include <limits>
00105 #include <cmath>
00106 
00107 //-------------------------- UTILITY ROUTINES ---------------------------
00108 
00109 namespace {
00110 
00111 // Convenient (but perhaps not the most efficient) helper to convert
00112 // various data types to strings.
00113 //
00114 // DEVNOTE: Works as long as type T defines an operator << that writes to
00115 // an ostream.
00116 template<typename T>
00117 std::string to_string(const T& t)
00118 {
00119    std::ostringstream str ;
00120    str << t ;
00121    return str.str() ;
00122 }
00123 
00124 // Count the number of lines in a file (wc -l)
00125 int count_lines(const std::string& file_name)
00126 {
00127    int n = -1 ; // because EOF is read after final \n (1 extra iter. of loop)
00128    std::ifstream ifs(file_name.c_str()) ;
00129 
00130    std::string dummy ;
00131    while (ifs) {
00132       getline(ifs, dummy) ;
00133       ++n ;
00134    }
00135    return n ;
00136 }
00137 
00138 // Returns true if a floating point number is near zero
00139 bool is_zero(double d)
00140 {
00141    return std::fabs(d) <= std::numeric_limits<double>::epsilon() ;
00142 }
00143 
00144 } // end of local namespace encapsulating utility routines section
00145 
00146 //------------------------ TEXTON ACCUMULATION --------------------------
00147 
00148 // Given an input image, GistEstimatorTexton works by performing
00149 // K-nearest neighbour search on the textons in the input image. Each of
00150 // the input textons is matched against the database of "universal"
00151 // textons and the frequency of the occurrence of universal textons in
00152 // the input image is used as the basis of image classification.
00153 //
00154 // The universal textons database is just a collection of the 100 most
00155 // frequently occuring textons in the training set. It is stored as a
00156 // 100x36 matrix (the number 36 comes from 6x3x2, where 6 is the number
00157 // of orientations input images are filtered at, 3 is the number of
00158 // scales for each orientation and 2 is due to the even and odd filters
00159 // applied at each scale and orientation; refer to the Renninger-Malik
00160 // paper for further details).
00161 //
00162 // This program has an operational mode that spits out a plain text file
00163 // containing the universal textons for a given set of input images by
00164 // first accumulating all the textons and then performing K-means
00165 // clustering on them. To get at the training set's textons, we need to
00166 // "hook" into the GistEstimatorTexton's processing pipeline.
00167 //
00168 // These textons are then stored in a plain text file that is then loaded
00169 // as part of the data for the K-means clustering procedure.
00170 //
00171 // This section of code takes care of accumulating the textons for the
00172 // training set in the above-mentioned plain text file. The code for
00173 // performing K-means analysis on the accumulated textons is in the next
00174 // section.
00175 namespace {
00176 
00177 // A texton is simply the vector of filter responses for a given pixel.
00178 // That is, if we apply 36 filters to an input image, we will get 36
00179 // Images as the filteration results. The texton for pixel (i,j) will be
00180 // the vector of 36 numbers formed by taking pixel (i,j) from each of the
00181 // 36 Images in the filteration results.
00182 //
00183 // Rather than implement some custom object to represent a texton and a
00184 // collection of textons, we simply piggyback off INVT's Image<T> class,
00185 // which is used to store the textons for an entire image.
00186 typedef GistEstimatorTexton::ImageType Texton ;
00187 
00188 // Quick wrapper around an output file used to store the training
00189 // textons. This file is populated during the texton accumulation phase
00190 // and then loaded as the data matrix for the K-means computation.
00191 class textons_accumulator {
00192    static std::string out_file ;
00193 
00194    textons_accumulator() ; // private to disallow instantiation
00195    ~textons_accumulator() ;
00196 public :
00197    static void output_file(const std::string& file_name) ;
00198    static void write(const Texton&) ;
00199 } ;
00200 
00201 // Static data member for storing the training textons file name
00202 // persistently across multiple invocations of the GistEstimatorTexton's
00203 // training hook.
00204 std::string textons_accumulator::out_file ;
00205 
00206 // The GistEstimatorTexton client must set the above variable
00207 // appropriately prior to setting up the GistEstimatorTexton training
00208 // hook.
00209 void textons_accumulator::output_file(const std::string& file_name)
00210 {
00211    out_file = file_name ;
00212 }
00213 
00214 // The following function is meant to be used by the GistEstimatorTexton
00215 // training hook. It simply appends the texton Image passed to it to the
00216 // output file row by row.
00217 //
00218 // DEVNOTE: We could open the output file once (i.e., in the previous
00219 // function) and use that object to avoid reopening (by using a static
00220 // ostream data member rather than a static string). However, if the
00221 // program were to somehow crash halfway through, then the training
00222 // textons output file would be in an inconsistent state and rerunning
00223 // the program can result in appending data to a possibly inconsistent
00224 // dataset, which would only make things worse.
00225 //
00226 // Thus, we choose to open and close the output file each time the
00227 // GistEstimatorTexton training hook is triggered. (Of course, if the
00228 // program cashes while this function is executing, then all bets are
00229 // off; the training textons file's inconsistency will be unavoidable in
00230 // this case.)
00231 void textons_accumulator::write(const Texton& textons)
00232 {
00233    if (out_file.empty())
00234       throw std::runtime_error("textons accumulator output file "
00235                                "not specified") ;
00236 
00237    std::ofstream ofs(out_file.c_str(), std::ios::out | std::ios::app) ;
00238    for (int y = 0; y < textons.getHeight(); ++y) {
00239       for (int x = 0; x < textons.getWidth(); ++x)
00240          ofs << textons.getVal(x, y) << ' ' ;
00241       ofs << '\n' ;
00242    }
00243 }
00244 
00245 // The following function is the callback for the GistEstimatorTexton's
00246 // training hook. The gist estimator object will pass this function an
00247 // Image that serves as the current input image's textons. This
00248 // function simply shoves this texton Image into the accumulator defined
00249 // above.
00250 void accumulate_textons(const Texton& textons)
00251 {
00252    textons_accumulator::write(textons) ;
00253 }
00254 
00255 } // end of local namespace encapsulating texton accumulation section
00256 
00257 //------------------- UNIVERSAL TEXTONS COMPUTATION ---------------------
00258 
00259 // Once the textons have been accumulated from the filteration results
00260 // of each of the input images, we compute the universal textons using
00261 // the K-means implementation available in OpenCV.
00262 //
00263 // DEVNOTE: Renninger and Malik used the K-means implementation in the
00264 // Netlab (Matlab) toolbox. Unfortunately, Matlab was unable to handle
00265 // the volume of data being passed to it for the gist models comparison
00266 // project at iLab (for which this implementation was developed).
00267 namespace {
00268 
00269 // Crude encapsulation of OpenCV matrices
00270 class OpenCVMatrix {
00271    CvMat* matrix ;
00272 public :
00273    OpenCVMatrix(int num_rows, int num_cols, int type) ;
00274    OpenCVMatrix(CvMat*) ;
00275    ~OpenCVMatrix() ;
00276 
00277    int num_rows() const {return matrix->rows ;}
00278    int num_cols() const {return matrix->cols ;}
00279    int type()     const {return CV_MAT_TYPE(matrix->type) ;}
00280 
00281    template<typename T> // T must match matrix->type (float for CV_32FC1, etc.)
00282    T get(int i, int j) const {return CV_MAT_ELEM(*matrix, T, i, j) ;}
00283 
00284    operator CvMat*() const {return matrix ;} // auto conv. (usually a bad idea)
00285 } ;
00286 
00287 OpenCVMatrix::OpenCVMatrix(int num_rows, int num_cols, int type)
00288    : matrix(cvCreateMat(num_rows, num_cols, type))
00289 {
00290    if (! matrix)
00291       throw std::runtime_error("unable to create OpenCV matrix") ;
00292 }
00293 
00294 OpenCVMatrix::OpenCVMatrix(CvMat* M)
00295    : matrix(M)
00296 {
00297    if (! matrix)
00298       throw std::runtime_error("cannot create empty/null matrix") ;
00299 }
00300 
00301 OpenCVMatrix::~OpenCVMatrix()
00302 {
00303    cvReleaseMat(& matrix) ;
00304 }
00305 
00306 // The following function reads the training textons into an OpenCV
00307 // matrix. It must know how many lines the training textons file has.
00308 CvMat* load_training_textons(const std::string& file_name, int num_lines)
00309 {
00310    CvMat* M =
00311       cvCreateMat(num_lines, GistEstimatorTexton::NUM_FILTERS, CV_32FC1) ;
00312 
00313    double d ;
00314    std::ifstream ifs(file_name.c_str()) ;
00315    for (int i = 0; i < num_lines; ++i)
00316       for (int j = 0; j < int(GistEstimatorTexton::NUM_FILTERS); ++j) {
00317          if (! ifs) {
00318             cvReleaseMat(& M) ;
00319             throw std::runtime_error(file_name + ": out of data?!?") ;
00320          }
00321          ifs >> d ;
00322          cvmSet(M, i, j, d) ;
00323       }
00324 
00325    return M ;
00326 }
00327 
00328 // OpenCV's K-means implementation returns cluster assignments. But we
00329 // need the cluster centroids. This function takes the data matrix and
00330 // cluster assignments and returns the K centroids.
00331 CvMat* compute_centroids(int K, const OpenCVMatrix& data,
00332                          const OpenCVMatrix& cluster_assignments)
00333 {
00334    CvMat* centroids = cvCreateMat(K, data.num_cols(), data.type()) ;
00335    cvZero(centroids) ;
00336 
00337    std::vector<int> cluster_counts(K) ;
00338    std::fill(cluster_counts.begin(), cluster_counts.end(), 0) ;
00339 
00340    for (int i = 0; i < data.num_rows(); ++i)
00341    {
00342       int C = cluster_assignments.get<int>(i, 0) ;
00343       ++cluster_counts[C] ;
00344 
00345       // Compute sum of C-th centroid and i-th row
00346       for (int j = 0; j < data.num_cols(); ++j)
00347          cvmSet(centroids, C, j,
00348                 cvmGet(centroids, C, j) + data.get<float>(i, j)) ;
00349    }
00350 
00351    // Compute the K centroids by averaging the totals accumulated in the
00352    // centroids matrix using the cluster counts.
00353    for (int C = 0; C < K; ++C)
00354       for (int j = 0; j < data.num_cols(); ++j)
00355          cvmSet(centroids, C, j,
00356                 cvmGet(centroids, C, j) / cluster_counts[C]) ;
00357 
00358    return centroids ;
00359 }
00360 
00361 // K-means parameters
00362 #ifndef TT_KMEANS_ITERATIONS
00363    #define TT_KMEANS_ITERATIONS (100)
00364 #endif
00365 #ifndef TT_KMEANS_PRECISION
00366    #define TT_KMEANS_PRECISION (.01)
00367 #endif
00368 
00369 // This function performs K-means clustering on the supplied data matrix
00370 // and returns the cluster centers.
00371 CvMat* kmeans(int K, const OpenCVMatrix& data)
00372 {
00373    OpenCVMatrix cluster_assignments(data.num_rows(), 1, CV_32SC1) ;
00374 
00375    LINFO("MVN: computing K-means cluster assignments with OpenCV") ;
00376    cvKMeans2(data, K, cluster_assignments,
00377              cvTermCriteria(CV_TERMCRIT_EPS + CV_TERMCRIT_ITER,
00378                             TT_KMEANS_ITERATIONS, TT_KMEANS_PRECISION)) ;
00379 
00380    LINFO("MVN: cluster assignments done; computing centroids...") ;
00381    return compute_centroids(K, data, cluster_assignments) ;
00382 }
00383 
00384 // Write the universal textons, row by row, to a plain text file.
00385 void save_universal_textons(const OpenCVMatrix& universal_textons,
00386                             const std::string& file_name)
00387 {
00388    std::ofstream ofs(file_name.c_str()) ;
00389    for (int i = 0; i < universal_textons.num_rows(); ++i) {
00390       for (int j = 0; j < universal_textons.num_cols(); ++j)
00391          ofs << universal_textons.get<float>(i, j) << ' ' ;
00392       ofs << '\n' ;
00393    }
00394 }
00395 
00396 // Read the universal textons from a plain text file into an Image<T>
00397 Texton load_universal_textons(const std::string& file_name)
00398 {
00399    const int M = count_lines(file_name) ;
00400    const int N = GistEstimatorTexton::NUM_FILTERS ;
00401    Texton U(N, M, ZEROS) ;
00402 
00403    float f ;
00404    std::ifstream ifs(file_name.c_str()) ;
00405    for (int j = 0; j < M; ++j)
00406       for (int i = 0; i < N; ++i) {
00407          if (! ifs)
00408             throw std::runtime_error(file_name + ": out of data?!?") ;
00409          ifs >> f ;
00410          U.setVal(i, j, f) ;
00411       }
00412 
00413    return U ;
00414 }
00415 
00416 // The training textons are agglomerated into the following number of
00417 // clusters.
00418 //
00419 // DEVNOTE: Although not used in this section, it makes most sense to
00420 // define this symbol here. It does not fit well into the other sections
00421 // of this file.
00422 #ifndef TT_NUM_UNIVERSAL_TEXTONS
00423    #define TT_NUM_UNIVERSAL_TEXTONS 100
00424 #endif
00425 
00426 } // end of local namespace encapsulating universal textons section
00427 
00428 //------------------- TRAINING HISTOGRAM PROCESSING ---------------------
00429 
00430 // Training is a two step process: first, we use K-means to cluster the
00431 // training set's textons to create the universal textons. Then, we
00432 // collect the histograms counting the universal textons in the training
00433 // images. The universal textons and training set's histogram "database"
00434 // are both used for image classification.
00435 namespace {
00436 
00437 // Some useful types for dealing with texton histograms
00438 typedef Image<double> Histogram ;
00439 typedef std::map<std::string, Histogram> HistogramMap ;
00440 typedef HistogramMap::value_type HistogramMapEntry ;
00441 
00442 // This function appends a training image's histogram to the training
00443 // histograms database file under the supplied "entry" name. As we did in
00444 // the textons accumulation function, in order to minimize possible
00445 // inconsistencies in this database, we choose to open and close the
00446 // training histograms file with each invocation of this helper rather
00447 // than keep a persistent ostream object around that obviates the need
00448 // for repeated file open/close operations.
00449 void save_histogram(const Histogram& histogram,
00450                     const std::string& hist_name,
00451                     const std::string& file_name)
00452 {
00453    LINFO("MVN: saving histogram %s to %s",
00454          hist_name.c_str(), file_name.c_str()) ;
00455    std::ofstream ofs(file_name.c_str(), std::ios::out | std::ios::app) ;
00456    ofs << hist_name << ' ' ;
00457    for (int y = 0; y < histogram.getHeight(); ++y) // should be just one row
00458       for (int x = 0; x < histogram.getWidth(); ++x) // should be 100 columns
00459          ofs << histogram.getVal(x, y) << ' ' ;
00460    ofs << '\n' ;
00461 }
00462 
00463 // The following function reads the training histograms "database," which
00464 // is a plain text file containing one histogram per line. Each line
00465 // starts with the name of the training histogram and then come the
00466 // hundred numbers making up that histogram.
00467 HistogramMap load_training_histograms(const std::string& file_name)
00468 {
00469    HistogramMap histograms ;
00470 
00471    std::ifstream ifs(file_name.c_str()) ;
00472    for(;;)
00473    {
00474       std::string str ;
00475       std::getline(ifs, str) ;
00476       if (! ifs || str.empty())
00477          break ;
00478       std::istringstream line(str) ;
00479 
00480       std::string histogram_name ;
00481       line >> histogram_name ;
00482 
00483       Histogram H(TT_NUM_UNIVERSAL_TEXTONS, 1, ZEROS) ;
00484       double d ; int i = 0 ;
00485       while (line >> d)
00486          H.setVal(i++, 0, d) ;
00487 
00488       histograms.insert(std::make_pair(histogram_name, H)) ;
00489    }
00490 
00491    return histograms ;
00492 }
00493 
00494 } // end of local namespace encapsulating training histograms section
00495 
00496 //----------------------- IMAGE CLASSIFICATION --------------------------
00497 
00498 // Given the histograms for an input image and each of the training
00499 // images, we can tell which training image the input image matches most
00500 // closely by performing a chi-squared distance check between the input
00501 // image's histogram and the histograms of each of the training images.
00502 namespace {
00503 
00504 // When computing the chi-square distance between the input image's
00505 // histogram and that of each of the training images, we want to be able
00506 // to tell which training image is the closest. For that, we use the
00507 // following pair that "maps" a training histogram name to its
00508 // corresponding distance.
00509 typedef std::pair<std::string, double> HistogramDistance ;
00510 
00511 // To sort histogram distances, we want to compare the chi-square
00512 // measure rather than their names.
00513 bool chi_square_cmp(const HistogramDistance& L, const HistogramDistance& R)
00514 {
00515    return L.second < R.second ;
00516 }
00517 
00518 // But when writing classification results, we're only interested in the
00519 // matching training image's name and not really in the chi-square
00520 // distance between its histogram and that of the input image.
00521 std::ostream& operator<<(std::ostream& os, const HistogramDistance& D)
00522 {
00523    return os << D.first ;
00524 }
00525 
00526 // Given an entry from the training histograms map, the following
00527 // function object returns the chi-square distance between the input
00528 // image's histogram and the training image's histogram.
00529 class chi_square {
00530    const Histogram& input ;
00531    double distance(const Histogram&, const Histogram&) const ;
00532 public :
00533    chi_square(const Histogram& H) ;
00534    HistogramDistance operator()(const HistogramMapEntry& E) const {
00535       return std::make_pair(E.first, distance(input, E.second)) ;
00536    }
00537 } ;
00538 
00539 chi_square::chi_square(const Histogram& H)
00540    : input(H)
00541 {}
00542 
00543 double chi_square::distance(const Histogram& L, const Histogram& R) const
00544 {
00545    const int n = L.getWidth() ; // both should have same dimensions (100x1)
00546    double sum = 0 ;
00547    for (int i = 0; i < n; ++i)
00548    {
00549       double l = L.getVal(i, 0) ;
00550       double r = R.getVal(i, 0) ;
00551       double l_minus_r = l - r ;
00552       double l_plus_r  = l + r ;
00553       if (is_zero(l_minus_r) || is_zero(l_plus_r))
00554          continue ;
00555       sum += (l_minus_r * l_minus_r)/l_plus_r ;
00556    }
00557    return sum/2 ;
00558 }
00559 
00560 // This function computes the chi-square distance between the input
00561 // image's histogram and the histograms of the training images and then
00562 // writes the top five matches to the specified results file.
00563 //
00564 // DEVNOTE: To output the top five matches to the results file, we ought
00565 // to be able to use the std::copy algorithm in conjunction with
00566 // std::ostream_iterator<HistogramDistance>. Unfortunately, ostream
00567 // iterators cannot be used with user-defined types. This is entirely in
00568 // keeping with C++'s philosophy of sucking ass most of the time but
00569 // sucking ass big-time only every now and then.
00570 void classify_image(const HistogramMapEntry& input,
00571                     const HistogramMap& training_histograms,
00572                     const std::string& results_file)
00573 {
00574    std::vector<HistogramDistance> chi_square_distances ;
00575    std::transform(training_histograms.begin(), training_histograms.end(),
00576                   std::back_inserter(chi_square_distances),
00577                   chi_square(input.second)) ;
00578    std::sort(chi_square_distances.begin(), chi_square_distances.end(),
00579              chi_square_cmp) ;
00580 
00581    std::ofstream ofs(results_file.c_str(), std::ios::out | std::ios::app) ;
00582    ofs << input.first << ' ' ;
00583    //std::copy(chi_square_distances.begin(), chi_square_distances.begin() + 5,
00584              //std::ostream_iterator<HistogramDistance>(ofs, " ")) ; // ERROR!
00585    for (unsigned int i = 0; i < chi_square_distances.size() && i < 5; ++i)
00586       ofs << chi_square_distances[i] << ' ' ;
00587    ofs << '\n' ;
00588 }
00589 
00590 } // end of local namespace encapsulating image classification section
00591 
00592 //----------------------- COMMAND LINE OPTIONS --------------------------
00593 
00594 // This program has four distinct phases/modes of operation, each one
00595 // specified via a suitable non-option command line argument.
00596 // Additionally, it supports several command line options to allow users
00597 // to tweak various parameters such as the name of the universal textons
00598 // file, the training histograms database, and so on.
00599 namespace {
00600 
00601 const ModelOptionCateg MOC_TEXTONS = {
00602    MOC_SORTPRI_3,
00603    "Options specific to the Renninger-Malik textons program",
00604 } ;
00605 
00606 // In the training textons accumulation phase, we collect all the textons
00607 // of the input images into a plain text file.
00608 #ifndef TT_DEFAULT_TRAINING_TEXTONS_FILE
00609    #define TT_DEFAULT_TRAINING_TEXTONS_FILE "training_textons.txt"
00610 #endif
00611 
00612 const ModelOptionDef OPT_TrainingTextons = {
00613    MODOPT_ARG_STRING, "TrainingTextons", & MOC_TEXTONS, OPTEXP_CORE,
00614    "This option specifies the name of the file where training textons\n"
00615    "should be accumulated or read from. This is a plain text file containing\n"
00616    "the training textons matrix that will be fed into the K-means procedure\n"
00617    "during the texton training phase. Each line of this file will contain a\n"
00618    "row of training textons.\n",
00619    "training-textons", '\0', "training-textons-file",
00620    TT_DEFAULT_TRAINING_TEXTONS_FILE,
00621 } ;
00622 
00623 // In the texton training phase, we use the accumulated training textons
00624 // and perform K-means on them to produce the universal textons.
00625 #ifndef TT_DEFAULT_UNIVERSAL_TEXTONS_FILE
00626    #define TT_DEFAULT_UNIVERSAL_TEXTONS_FILE "universal_textons.txt"
00627 #endif
00628 
00629 const ModelOptionDef OPT_UniversalTextons = {
00630    MODOPT_ARG_STRING, "UniversalTextons", & MOC_TEXTONS, OPTEXP_CORE,
00631    "This option specifies the name of the file in which the universal\n"
00632    "textons are (or are to be) stored. This is a plain text file containing\n"
00633    "the universal_textons matrix that is used for image classification.\n",
00634    "universal-textons", '\0', "universal-textons-file",
00635    TT_DEFAULT_UNIVERSAL_TEXTONS_FILE,
00636 } ;
00637 
00638 // In the second phase of texton training, we count the universal textons
00639 // in the training images and store them in a training histograms
00640 // "database" under the specified "entry name."
00641 //
00642 // DEVNOTE: The default value for this option (i.e., --histogram-name) is
00643 // not very useful. This particular option really ought to be specified
00644 // on the command line.
00645 #ifndef TT_DEFAULT_TRAINING_HISTOGRAM_NAME
00646    #define TT_DEFAULT_TRAINING_HISTOGRAM_NAME "training_image"
00647 #endif
00648 
00649 const ModelOptionDef OPT_HistogramName = {
00650    MODOPT_ARG_STRING, "HistogramName", & MOC_TEXTONS, OPTEXP_CORE,
00651    "This option specifies the \"root\" name of the histogram entry in\n"
00652    "the training histograms database. The histogram number will be\n"
00653    "appended to this \"root\" name. The training histograms database\n"
00654    "is a plain text file containing one histogram entry per line. The\n"
00655    "first field specifies the name plus number of the entry (e.g.,\n"
00656    "foo_1, foo_2, bar_1, and so on). The remaining fields are simply the\n"
00657    "hundred numbers making up the image's universal textons histogram.\n\n"
00658    "In classification mode, this option specifies the name of the input\n"
00659    "image's histogram that is written to the results file.\n",
00660    "histogram-name", '\0', "histogram-name-root",
00661    TT_DEFAULT_TRAINING_HISTOGRAM_NAME,
00662 } ;
00663 
00664 #ifndef TT_DEFAULT_TRAINING_HISTOGRAMS_FILE
00665    #define TT_DEFAULT_TRAINING_HISTOGRAMS_FILE "training_histograms.txt"
00666 #endif
00667 
00668 const ModelOptionDef OPT_HistogramFile = {
00669    MODOPT_ARG_STRING, "HistogramFile", & MOC_TEXTONS, OPTEXP_CORE,
00670    "This option specifies the name of the training histograms database,\n"
00671    "a plain text file containing one histogram entry per line. The\n"
00672    "first field specifies the name plus number of the entry (e.g.,\n"
00673    "foo_1, foo_2, bar_1, and so on). The remaining fields are simply the\n"
00674    "hundred numbers making up the image's universal textons histogram.\n",
00675    "histogram-file", '\0', "training-histograms-file",
00676    TT_DEFAULT_TRAINING_HISTOGRAMS_FILE,
00677 } ;
00678 
00679 // In image classification mode, we write the results to the following
00680 // file.
00681 #ifndef TT_DEFAULT_CLASSIFICATION_RESULTS_FILE
00682    #define TT_DEFAULT_CLASSIFICATION_RESULTS_FILE "texton_classifications.txt"
00683 #endif
00684 
00685 const ModelOptionDef OPT_ResultsFile = {
00686    MODOPT_ARG_STRING, "ResultsFile", & MOC_TEXTONS, OPTEXP_CORE,
00687    "This option specifies the name of the classification results file,\n"
00688    "a plain text file containing one result entry per line. The first\n"
00689    "field specifies the name of the input image plus number of the entry,\n"
00690    "(e.g., foo_1, foo_2, bar_1, and so on). Then come the names of the\n"
00691    "top five matching images from the training set.\n",
00692    "results-file", '\0', "classification-results-file",
00693    TT_DEFAULT_CLASSIFICATION_RESULTS_FILE,
00694 } ;
00695 
00696 // The different operational modes of this program must be specified as
00697 // the one and only non-option command line argument. This "action"
00698 // command must be one of the following strings (case-sensitive!):
00699 //
00700 // 1. accumulate -- accumulate the training textons in the plain text
00701 //    file specified by the --training-textons option (default is to
00702 //    accumulate the training textons in training_textons.txt in the
00703 //    current directory.
00704 //
00705 // 2. kmeans -- compute the universal textons from the training textons
00706 //    using the K-means implementation in OpenCV.
00707 //
00708 //    The --training-textons option can be used to specify the input file
00709 //    for the K-means and --universal-textons option can be used to
00710 //    specify the output file. The defaults are to read from
00711 //    training_textons.txt and write to universal_textons.mat (in the
00712 //    current directory).
00713 //
00714 // 3. histogram -- compute the histograms for the training set. The
00715 //    output is sent to the text file specified by the --histogram-file
00716 //    option. It is a good idea to also supply the --histogram-name
00717 //    option when saving training histograms from an MPEG. A good choice
00718 //    of the entry's name would be the basename of the MPEG file sans
00719 //    extension.
00720 //
00721 // 4. classify -- uses the universal textons and histograms produced by
00722 //    the kmeans and histogram commands to classify the input images
00723 //    streaming in.
00724 #ifndef TT_ACCUMULATE_CMD
00725    #define TT_ACCUMULATE_CMD "accumulate"
00726 #endif
00727 #ifndef TT_KMEANS_CMD
00728    #define TT_KMEANS_CMD "kmeans"
00729 #endif
00730 #ifndef TT_HISTOGRAM_CMD
00731    #define TT_HISTOGRAM_CMD "histogram"
00732 #endif
00733 #ifndef TT_CLASSIFY_CMD
00734    #define TT_CLASSIFY_CMD "classify"
00735 #endif
00736 
00737 // For printing usage info
00738 #ifndef TT_ACTIONS
00739    #define TT_ACTIONS ("{"TT_ACCUMULATE_CMD"|"TT_KMEANS_CMD"|"\
00740                           TT_HISTOGRAM_CMD"|"TT_CLASSIFY_CMD"}")
00741 #endif
00742 
00743 } // end of local namespace encapsulating command line options section
00744 
00745 //--------------------- SIMULATION ENCAPSULATION ------------------------
00746 
00747 // The following helper class wraps around the ModelManager and
00748 // associated objects, providing a neatly encapsulated API for the main
00749 // program.
00750 namespace {
00751 
00752 class TextonSimulation {
00753    ModelManager model_manager ;
00754    nub::soft_ref<SimEventQueueConfigurator> configurator ;
00755    nub::soft_ref<StdBrain> brain ;
00756    nub::ref<SimInputFrameSeries> input_frame_series ;
00757 
00758    // Various command line options specific to this program
00759    OModelParam<std::string> training_option ;
00760    OModelParam<std::string> universal_option ;
00761    OModelParam<std::string> hist_name_option ;
00762    OModelParam<std::string> hist_file_option ;
00763    OModelParam<std::string> results_option ;
00764 
00765 public :
00766    TextonSimulation(const std::string& model_name) ;
00767    void parse_command_line(int argc, const char* argv[]) ;
00768    void run() ;
00769    ~TextonSimulation() ;
00770 
00771 private :
00772    // The different actions performed by this program
00773    typedef void (TextonSimulation::*Action)() ;
00774    typedef std::map<std::string, Action> ActionMap ;
00775    ActionMap action_map ;
00776 
00777    void accumulate_training_textons() ;
00778    void compute_universal_textons() ;
00779    void compute_training_histograms() ;
00780    void classify_input_images() ;
00781 
00782    // Accessors for retrieving some of the command line arguments
00783    std::string training_textons_file()  {return training_option.getVal()  ;}
00784    std::string universal_textons_file() {return universal_option.getVal() ;}
00785    std::string histogram_name() {return hist_name_option.getVal() ;}
00786    std::string histogram_file() {return hist_file_option.getVal() ;}
00787    std::string results_file() {return results_option.getVal() ;}
00788 } ;
00789 
00790 // On instantiation, create the model manager and the simulation's
00791 // various components.
00792 TextonSimulation::TextonSimulation(const std::string& model_name)
00793    : model_manager(model_name),
00794      configurator(new SimEventQueueConfigurator(model_manager)),
00795      brain(new StdBrain(model_manager)),
00796      input_frame_series(new SimInputFrameSeries(model_manager)),
00797      training_option(& OPT_TrainingTextons, & model_manager),
00798      universal_option(& OPT_UniversalTextons, & model_manager),
00799      hist_name_option(& OPT_HistogramName, & model_manager),
00800      hist_file_option(& OPT_HistogramFile, & model_manager),
00801      results_option(& OPT_ResultsFile, & model_manager)
00802 {
00803    model_manager.addSubComponent(configurator) ;
00804    model_manager.addSubComponent(brain) ;
00805    model_manager.addSubComponent(input_frame_series) ;
00806 
00807    typedef TextonSimulation me ; // typing shortcut
00808    action_map[TT_ACCUMULATE_CMD] = & me::accumulate_training_textons ;
00809    action_map[TT_KMEANS_CMD]     = & me::compute_universal_textons ;
00810    action_map[TT_HISTOGRAM_CMD]  = & me::compute_training_histograms ;
00811    action_map[TT_CLASSIFY_CMD]   = & me::classify_input_images ;
00812 }
00813 
00814 // TODO: Do we really need the single channel save raw maps option for
00815 // this texton training program? And how can we force the gist estimator
00816 // type to be always GistEstimatorTexton? This program doesn't make sense
00817 // for any other gist estimator.
00818 void TextonSimulation::parse_command_line(int argc, const char* argv[])
00819 {
00820    model_manager.setOptionValString(& OPT_SingleChannelSaveRawMaps, "true") ;
00821    model_manager.setOptionValString(& OPT_GistEstimatorType, "Texton") ;
00822    model_manager.setOptionValString(& OPT_NumOrientations, "6") ;
00823 
00824    model_manager.setOptionValString(& OPT_TrainingTextons,
00825                                     TT_DEFAULT_TRAINING_TEXTONS_FILE) ;
00826    model_manager.setOptionValString(& OPT_UniversalTextons,
00827                                     TT_DEFAULT_UNIVERSAL_TEXTONS_FILE) ;
00828 
00829    model_manager.setOptionValString(& OPT_HistogramName,
00830                                     TT_DEFAULT_TRAINING_HISTOGRAM_NAME) ;
00831    model_manager.setOptionValString(& OPT_HistogramFile,
00832                                     TT_DEFAULT_TRAINING_HISTOGRAMS_FILE) ;
00833 
00834    model_manager.setOptionValString(& OPT_ResultsFile,
00835                                     TT_DEFAULT_CLASSIFICATION_RESULTS_FILE) ;
00836 
00837    if (! model_manager.parseCommandLine(argc, argv, TT_ACTIONS, 1, 1))
00838       throw std::runtime_error("command line parse error") ;
00839 }
00840 
00841 // To run the simulation, we simply dispatch to the function
00842 // corresponding to the action (non-option) command line argument.
00843 void TextonSimulation::run()
00844 {
00845    std::string cmd(model_manager.getExtraArg(0)) ;
00846    ActionMap::iterator action = action_map.find(cmd) ;
00847    if (action == action_map.end())
00848       throw std::runtime_error(cmd + ": sorry, unknown action") ;
00849    (this->*(action->second))() ;
00850 }
00851 
00852 // Quick helper class to start and stop model manager (useful when
00853 // exceptions are thrown because destructor automatically stops the model
00854 // manager without requiring an explicit call to the stop method prior to
00855 // throwing the exception).
00856 class ModelManagerStarter {
00857    ModelManager& mgr ;
00858 public :
00859    ModelManagerStarter(ModelManager& m) : mgr(m) {mgr.start() ;}
00860    ~ModelManagerStarter() {mgr.stop() ;}
00861 } ;
00862 
00863 // This method implements the simulation's main loop for the "accumulate"
00864 // action. The main loop which evolves the different components of the
00865 // simulation. Prior to starting the main loop though, it configures the
00866 // texton gist estimator's training callback, which is triggered at each
00867 // step of the brain's evolution. The texton gist estimator passes the
00868 // textons for the "current" input image to this callback, which then
00869 // proceeds to accumulate the textons in the file specified by the
00870 // --training-textons option.
00871 //
00872 // The code for the actual accumulation is in the TEXTON ACCUMULATION
00873 // section of this file.
00874 void TextonSimulation::accumulate_training_textons()
00875 {
00876    ModelManagerStarter M(model_manager) ;
00877 
00878    LFATAL("sorry, this gist program is broken and needs to be fixed") ;
00879    /*
00880    nub::soft_ref<GistEstimatorTexton> ge =
00881       dynCastWeak<GistEstimatorTexton>(brain->getGE()) ;
00882    if (ge.isInvalid())
00883       throw std::runtime_error("can only use GistEstimatorTexton") ;
00884 
00885    ge->setTrainingHook(accumulate_textons) ;
00886    textons_accumulator::output_file(training_textons_file()) ;
00887 
00888    nub::ref<SimEventQueue> event_queue = configurator->getQ() ;
00889    for(;;)
00890    {
00891       try
00892       {
00893          input_frame_series->evolve(*event_queue) ;
00894          brain->evolve(*event_queue) ; // triggers training hook
00895          if (event_queue->evolve() != SIM_CONTINUE)
00896             break ;
00897       }
00898       catch (lfatal_exception&) // if we seek beyond end of frame series
00899       {
00900          return ; // prevent LFATAL induced abortion
00901       }
00902    }
00903    // */
00904 }
00905 
00906 // The following method implements the "kmeans" action of this program
00907 // for clustering the training textons to obtain the 100 universal
00908 // textons.
00909 void TextonSimulation::compute_universal_textons()
00910 {
00911    LINFO("MVN: counting lines in %s", training_textons_file().c_str()) ;
00912    int num_rows = count_lines(training_textons_file()) ;
00913 
00914    LINFO("MVN: reading %d training textons from %s",
00915          num_rows, training_textons_file().c_str()) ;
00916    OpenCVMatrix training_textons =
00917       load_training_textons(training_textons_file(), num_rows) ;
00918 
00919    const int K = TT_NUM_UNIVERSAL_TEXTONS ;
00920    LINFO("MVN: doing K-means on training textons to get %d clusters", K) ;
00921    OpenCVMatrix universal_textons = kmeans(K, training_textons) ;
00922 
00923    LINFO("MVN: K-means done; saving universal textons to %s",
00924          universal_textons_file().c_str()) ;
00925    save_universal_textons(universal_textons, universal_textons_file()) ;
00926 }
00927 
00928 // This method implements the "histogram" action of this program. Like
00929 // the accumulate action, it implements a "main loop" for the simulation,
00930 // evolving different components with each iteration. But rather than
00931 // dipping into the GistEstimatorTexton's processing pipeline, it starts
00932 // the Matlab engine, loads the universal textons and then uses the
00933 // GistEstimatorTexton to obtain the histogram for each of the training
00934 // images. These histograms are saved to the training histograms database
00935 // specified by the --histogram-file option.
00936 void TextonSimulation::compute_training_histograms()
00937 {
00938    ModelManagerStarter M(model_manager) ;
00939 
00940    LFATAL("sorry, this gist program is broken and needs to be fixed") ;
00941    /*
00942    nub::soft_ref<GistEstimatorTexton> ge =
00943       dynCastWeak<GistEstimatorTexton>(brain->getGE()) ;
00944    if (ge.isInvalid())
00945       throw std::runtime_error("can only use GistEstimatorTexton") ;
00946 
00947    Texton U = load_universal_textons(universal_textons_file()) ;
00948    ge->setUniversalTextons(& U) ;
00949    LINFO("MVN: loaded %d universal textons from %s",
00950          U.getHeight(), universal_textons_file().c_str()) ;
00951 
00952    int i = 1 ;
00953    nub::ref<SimEventQueue> event_queue = configurator->getQ() ;
00954    for(;;)
00955    {
00956       try
00957       {
00958          input_frame_series->evolve(*event_queue) ;
00959          brain->evolve(*event_queue) ;
00960          SeC<SimEventGistOutput> gist_out =
00961             event_queue->check<SimEventGistOutput>(brain.get(),
00962                                                    SEQ_UNMARKED | SEQ_MARK,
00963                                                    ge.get()) ;
00964          if (gist_out) // texton GE has a gist vector waiting to be picked up
00965             save_histogram(ge->getGist(), histogram_name() + to_string(i++),
00966                            histogram_file()) ;
00967          if (event_queue->evolve() != SIM_CONTINUE)
00968             break ;
00969       }
00970       catch (lfatal_exception&) // if we seek beyond end of frame series
00971       {
00972          return ; // prevent LFATAL induced abortion
00973       }
00974    }
00975    // */
00976 }
00977 
00978 // The following method implements this program's "classify" action. It
00979 // reads the training histograms database and the universal textons and
00980 // then uses a chi-square measure to compute the closest match for the
00981 // input image.
00982 void TextonSimulation::classify_input_images()
00983 {
00984    ModelManagerStarter M(model_manager) ;
00985 
00986    LFATAL("sorry, this gist program is broken and needs to be fixed") ;
00987    /*
00988    nub::soft_ref<GistEstimatorTexton> ge =
00989       dynCastWeak<GistEstimatorTexton>(brain->getGE()) ;
00990    if (ge.isInvalid())
00991       throw std::runtime_error("can only use GistEstimatorTexton") ;
00992 
00993    Texton U = load_universal_textons(universal_textons_file()) ;
00994    ge->setUniversalTextons(& U) ;
00995    LINFO("MVN: loaded %d universal textons from %s",
00996          U.getHeight(), universal_textons_file().c_str()) ;
00997 
00998    HistogramMap training_histograms =
00999       load_training_histograms(histogram_file()) ;
01000    LINFO("MVN: loaded %d training histograms from %s",
01001          int(training_histograms.size()), histogram_file().c_str()) ;
01002 
01003    int i = 1 ;
01004    nub::ref<SimEventQueue> event_queue = configurator->getQ() ;
01005    for(;;)
01006    {
01007       try
01008       {
01009          input_frame_series->evolve(*event_queue) ;
01010          brain->evolve(*event_queue) ;
01011          SeC<SimEventGistOutput> gist_out =
01012             event_queue->check<SimEventGistOutput>(brain.get(),
01013                                                    SEQ_UNMARKED | SEQ_MARK,
01014                                                    ge.get()) ;
01015          if (gist_out) // texton GE has a gist vector waiting to be picked up
01016             classify_image(std::make_pair(histogram_name() + to_string(i++),
01017                                           ge->getGist()),
01018                            training_histograms, results_file()) ;
01019          if (event_queue->evolve() != SIM_CONTINUE)
01020             break ;
01021       }
01022       catch (lfatal_exception&) // if we seek beyond end of frame series
01023       {
01024          return ; // prevent LFATAL induced abortion
01025       }
01026    }
01027    // */
01028 }
01029 
01030 // Do we really not have to delete the configurator, brain and input
01031 // frame series? If it turns out we do, this empty destructor will have
01032 // to be filled out with the necessary delete calls...
01033 TextonSimulation::~TextonSimulation() {}
01034 
01035 } // end of local namespace encapsulating simulation encapsulation section
01036 
01037 //------------------------------- MAIN ----------------------------------
01038 
01039 #ifdef HAVE_OPENCV
01040 
01041 int main(int argc, const char* argv[])
01042 {
01043    MYLOGVERB = LOG_INFO ; // suppress debug messages
01044    try
01045    {
01046       TextonSimulation S("train-texton Model") ;
01047       S.parse_command_line(argc, argv) ;
01048       S.run() ;
01049    }
01050    catch (std::exception& e)
01051    {
01052       LFATAL("%s", e.what()) ;
01053       return 1 ;
01054    }
01055    return 0 ;
01056 }
01057 
01058 #else
01059 
01060 int main()
01061 {
01062    LINFO("Sorry, this program needs OpenCV.") ;
01063    return 1 ;
01064 }
01065 
01066 #endif
01067 
01068 //-----------------------------------------------------------------------
01069 
01070 /* So things look consistent in everyone's emacs... */
01071 /* Local Variables: */
01072 /* indent-tabs-mode: nil */
01073 /* End: */