00001 /*! 00002 @file Gist/train-texton.C create the prototypical universal textons 00003 "database" from the training data 00004 */ 00005 00006 // //////////////////////////////////////////////////////////////////// // 00007 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00008 // University of Southern California (USC) and the iLab at USC. // 00009 // See http://iLab.usc.edu for information about this project. // 00010 // //////////////////////////////////////////////////////////////////// // 00011 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00012 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00013 // in Visual Environments, and Applications'' by Christof Koch and // 00014 // Laurent Itti, California Institute of Technology, 2001 (patent // 00015 // pending; application number 09/912,225 filed July 23, 2001; see // 00016 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00017 // //////////////////////////////////////////////////////////////////// // 00018 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00019 // // 00020 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00021 // redistribute it and/or modify it under the terms of the GNU General // 00022 // Public License as published by the Free Software Foundation; either // 00023 // version 2 of the License, or (at your option) any later version. // 00024 // // 00025 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00026 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00027 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00028 // PURPOSE. See the GNU General Public License for more details. // 00029 // // 00030 // You should have received a copy of the GNU General Public License // 00031 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00032 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00033 // Boston, MA 02111-1307 USA. // 00034 // //////////////////////////////////////////////////////////////////// // 00035 // 00036 // Primary maintainer for this file: mviswana usc edu 00037 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Gist/train-texton.C $ 00038 // $Id: train-texton.C 14605 2011-03-15 02:25:06Z dparks $ 00039 // 00040 00041 //------------------------------ HEADERS -------------------------------- 00042 00043 #include "Image/OpenCVUtil.H" // must be first to avoid conflicting defs of int64, uint64 00044 00045 // Gist specific headers 00046 #include "Neuro/GistEstimatorTexton.H" 00047 00048 // Other INVT headers 00049 #include "Neuro/StdBrain.H" 00050 #include "Neuro/NeuroOpts.H" 00051 #include "Neuro/NeuroSimEvents.H" 00052 00053 #include "Media/SimFrameSeries.H" 00054 #include "Media/MediaOpts.H" 00055 00056 #include "Simulation/SimEventQueue.H" 00057 #include "Simulation/SimEventQueueConfigurator.H" 00058 00059 #include "Channels/ChannelOpts.H" 00060 #include "Component/ModelManager.H" 00061 #include "Component/ModelOptionDef.H" 00062 00063 #include "Image/Point2D.H" 00064 00065 #include "nub/ref.h" 00066 00067 #ifndef HAVE_OPENCV // fake OpenCV API so as to not break builds 00068 namespace { 00069 00070 struct CvMat {int rows, cols, type ;} ; 00071 00072 inline CvMat* cvCreateMat(int, int, int) {return 0 ;} 00073 inline void cvZero(CvMat *) {} 00074 inline void cvReleaseMat(CvMat**) {} 00075 inline double cvmGet(CvMat*, int, int) {return 0 ;} 00076 inline void cvmSet(CvMat*, int, int, double) {} 00077 inline int cvTermCriteria(int, int, double) {return 0 ;} 00078 inline void cvKMeans2(CvMat*, int, CvMat*, int) {} 00079 00080 #define CV_32FC1 0 00081 #define CV_32SC1 0 00082 inline int CV_MAT_TYPE(int) {return 0 ;} 00083 #define CV_MAT_ELEM(matrix, type, row, col) (type(0)) 00084 00085 #define CV_TERMCRIT_EPS 0 00086 #define CV_TERMCRIT_ITER 0 00087 00088 } 00089 00090 #endif // OpenCV availability check 00091 00092 // Standard C++ headers 00093 #include <fstream> 00094 #include <sstream> 00095 #include <ios> 00096 #include <numeric> 00097 #include <algorithm> 00098 #include <functional> 00099 #include <map> 00100 #include <vector> 00101 #include <iterator> 00102 #include <stdexcept> 00103 #include <utility> 00104 #include <limits> 00105 #include <cmath> 00106 00107 //-------------------------- UTILITY ROUTINES --------------------------- 00108 00109 namespace { 00110 00111 // Convenient (but perhaps not the most efficient) helper to convert 00112 // various data types to strings. 00113 // 00114 // DEVNOTE: Works as long as type T defines an operator << that writes to 00115 // an ostream. 00116 template<typename T> 00117 std::string to_string(const T& t) 00118 { 00119 std::ostringstream str ; 00120 str << t ; 00121 return str.str() ; 00122 } 00123 00124 // Count the number of lines in a file (wc -l) 00125 int count_lines(const std::string& file_name) 00126 { 00127 int n = -1 ; // because EOF is read after final \n (1 extra iter. of loop) 00128 std::ifstream ifs(file_name.c_str()) ; 00129 00130 std::string dummy ; 00131 while (ifs) { 00132 getline(ifs, dummy) ; 00133 ++n ; 00134 } 00135 return n ; 00136 } 00137 00138 // Returns true if a floating point number is near zero 00139 bool is_zero(double d) 00140 { 00141 return std::fabs(d) <= std::numeric_limits<double>::epsilon() ; 00142 } 00143 00144 } // end of local namespace encapsulating utility routines section 00145 00146 //------------------------ TEXTON ACCUMULATION -------------------------- 00147 00148 // Given an input image, GistEstimatorTexton works by performing 00149 // K-nearest neighbour search on the textons in the input image. Each of 00150 // the input textons is matched against the database of "universal" 00151 // textons and the frequency of the occurrence of universal textons in 00152 // the input image is used as the basis of image classification. 00153 // 00154 // The universal textons database is just a collection of the 100 most 00155 // frequently occuring textons in the training set. It is stored as a 00156 // 100x36 matrix (the number 36 comes from 6x3x2, where 6 is the number 00157 // of orientations input images are filtered at, 3 is the number of 00158 // scales for each orientation and 2 is due to the even and odd filters 00159 // applied at each scale and orientation; refer to the Renninger-Malik 00160 // paper for further details). 00161 // 00162 // This program has an operational mode that spits out a plain text file 00163 // containing the universal textons for a given set of input images by 00164 // first accumulating all the textons and then performing K-means 00165 // clustering on them. To get at the training set's textons, we need to 00166 // "hook" into the GistEstimatorTexton's processing pipeline. 00167 // 00168 // These textons are then stored in a plain text file that is then loaded 00169 // as part of the data for the K-means clustering procedure. 00170 // 00171 // This section of code takes care of accumulating the textons for the 00172 // training set in the above-mentioned plain text file. The code for 00173 // performing K-means analysis on the accumulated textons is in the next 00174 // section. 00175 namespace { 00176 00177 // A texton is simply the vector of filter responses for a given pixel. 00178 // That is, if we apply 36 filters to an input image, we will get 36 00179 // Images as the filteration results. The texton for pixel (i,j) will be 00180 // the vector of 36 numbers formed by taking pixel (i,j) from each of the 00181 // 36 Images in the filteration results. 00182 // 00183 // Rather than implement some custom object to represent a texton and a 00184 // collection of textons, we simply piggyback off INVT's Image<T> class, 00185 // which is used to store the textons for an entire image. 00186 typedef GistEstimatorTexton::ImageType Texton ; 00187 00188 // Quick wrapper around an output file used to store the training 00189 // textons. This file is populated during the texton accumulation phase 00190 // and then loaded as the data matrix for the K-means computation. 00191 class textons_accumulator { 00192 static std::string out_file ; 00193 00194 textons_accumulator() ; // private to disallow instantiation 00195 ~textons_accumulator() ; 00196 public : 00197 static void output_file(const std::string& file_name) ; 00198 static void write(const Texton&) ; 00199 } ; 00200 00201 // Static data member for storing the training textons file name 00202 // persistently across multiple invocations of the GistEstimatorTexton's 00203 // training hook. 00204 std::string textons_accumulator::out_file ; 00205 00206 // The GistEstimatorTexton client must set the above variable 00207 // appropriately prior to setting up the GistEstimatorTexton training 00208 // hook. 00209 void textons_accumulator::output_file(const std::string& file_name) 00210 { 00211 out_file = file_name ; 00212 } 00213 00214 // The following function is meant to be used by the GistEstimatorTexton 00215 // training hook. It simply appends the texton Image passed to it to the 00216 // output file row by row. 00217 // 00218 // DEVNOTE: We could open the output file once (i.e., in the previous 00219 // function) and use that object to avoid reopening (by using a static 00220 // ostream data member rather than a static string). However, if the 00221 // program were to somehow crash halfway through, then the training 00222 // textons output file would be in an inconsistent state and rerunning 00223 // the program can result in appending data to a possibly inconsistent 00224 // dataset, which would only make things worse. 00225 // 00226 // Thus, we choose to open and close the output file each time the 00227 // GistEstimatorTexton training hook is triggered. (Of course, if the 00228 // program cashes while this function is executing, then all bets are 00229 // off; the training textons file's inconsistency will be unavoidable in 00230 // this case.) 00231 void textons_accumulator::write(const Texton& textons) 00232 { 00233 if (out_file.empty()) 00234 throw std::runtime_error("textons accumulator output file " 00235 "not specified") ; 00236 00237 std::ofstream ofs(out_file.c_str(), std::ios::out | std::ios::app) ; 00238 for (int y = 0; y < textons.getHeight(); ++y) { 00239 for (int x = 0; x < textons.getWidth(); ++x) 00240 ofs << textons.getVal(x, y) << ' ' ; 00241 ofs << '\n' ; 00242 } 00243 } 00244 00245 // The following function is the callback for the GistEstimatorTexton's 00246 // training hook. The gist estimator object will pass this function an 00247 // Image that serves as the current input image's textons. This 00248 // function simply shoves this texton Image into the accumulator defined 00249 // above. 00250 void accumulate_textons(const Texton& textons) 00251 { 00252 textons_accumulator::write(textons) ; 00253 } 00254 00255 } // end of local namespace encapsulating texton accumulation section 00256 00257 //------------------- UNIVERSAL TEXTONS COMPUTATION --------------------- 00258 00259 // Once the textons have been accumulated from the filteration results 00260 // of each of the input images, we compute the universal textons using 00261 // the K-means implementation available in OpenCV. 00262 // 00263 // DEVNOTE: Renninger and Malik used the K-means implementation in the 00264 // Netlab (Matlab) toolbox. Unfortunately, Matlab was unable to handle 00265 // the volume of data being passed to it for the gist models comparison 00266 // project at iLab (for which this implementation was developed). 00267 namespace { 00268 00269 // Crude encapsulation of OpenCV matrices 00270 class OpenCVMatrix { 00271 CvMat* matrix ; 00272 public : 00273 OpenCVMatrix(int num_rows, int num_cols, int type) ; 00274 OpenCVMatrix(CvMat*) ; 00275 ~OpenCVMatrix() ; 00276 00277 int num_rows() const {return matrix->rows ;} 00278 int num_cols() const {return matrix->cols ;} 00279 int type() const {return CV_MAT_TYPE(matrix->type) ;} 00280 00281 template<typename T> // T must match matrix->type (float for CV_32FC1, etc.) 00282 T get(int i, int j) const {return CV_MAT_ELEM(*matrix, T, i, j) ;} 00283 00284 operator CvMat*() const {return matrix ;} // auto conv. (usually a bad idea) 00285 } ; 00286 00287 OpenCVMatrix::OpenCVMatrix(int num_rows, int num_cols, int type) 00288 : matrix(cvCreateMat(num_rows, num_cols, type)) 00289 { 00290 if (! matrix) 00291 throw std::runtime_error("unable to create OpenCV matrix") ; 00292 } 00293 00294 OpenCVMatrix::OpenCVMatrix(CvMat* M) 00295 : matrix(M) 00296 { 00297 if (! matrix) 00298 throw std::runtime_error("cannot create empty/null matrix") ; 00299 } 00300 00301 OpenCVMatrix::~OpenCVMatrix() 00302 { 00303 cvReleaseMat(& matrix) ; 00304 } 00305 00306 // The following function reads the training textons into an OpenCV 00307 // matrix. It must know how many lines the training textons file has. 00308 CvMat* load_training_textons(const std::string& file_name, int num_lines) 00309 { 00310 CvMat* M = 00311 cvCreateMat(num_lines, GistEstimatorTexton::NUM_FILTERS, CV_32FC1) ; 00312 00313 double d ; 00314 std::ifstream ifs(file_name.c_str()) ; 00315 for (int i = 0; i < num_lines; ++i) 00316 for (int j = 0; j < int(GistEstimatorTexton::NUM_FILTERS); ++j) { 00317 if (! ifs) { 00318 cvReleaseMat(& M) ; 00319 throw std::runtime_error(file_name + ": out of data?!?") ; 00320 } 00321 ifs >> d ; 00322 cvmSet(M, i, j, d) ; 00323 } 00324 00325 return M ; 00326 } 00327 00328 // OpenCV's K-means implementation returns cluster assignments. But we 00329 // need the cluster centroids. This function takes the data matrix and 00330 // cluster assignments and returns the K centroids. 00331 CvMat* compute_centroids(int K, const OpenCVMatrix& data, 00332 const OpenCVMatrix& cluster_assignments) 00333 { 00334 CvMat* centroids = cvCreateMat(K, data.num_cols(), data.type()) ; 00335 cvZero(centroids) ; 00336 00337 std::vector<int> cluster_counts(K) ; 00338 std::fill(cluster_counts.begin(), cluster_counts.end(), 0) ; 00339 00340 for (int i = 0; i < data.num_rows(); ++i) 00341 { 00342 int C = cluster_assignments.get<int>(i, 0) ; 00343 ++cluster_counts[C] ; 00344 00345 // Compute sum of C-th centroid and i-th row 00346 for (int j = 0; j < data.num_cols(); ++j) 00347 cvmSet(centroids, C, j, 00348 cvmGet(centroids, C, j) + data.get<float>(i, j)) ; 00349 } 00350 00351 // Compute the K centroids by averaging the totals accumulated in the 00352 // centroids matrix using the cluster counts. 00353 for (int C = 0; C < K; ++C) 00354 for (int j = 0; j < data.num_cols(); ++j) 00355 cvmSet(centroids, C, j, 00356 cvmGet(centroids, C, j) / cluster_counts[C]) ; 00357 00358 return centroids ; 00359 } 00360 00361 // K-means parameters 00362 #ifndef TT_KMEANS_ITERATIONS 00363 #define TT_KMEANS_ITERATIONS (100) 00364 #endif 00365 #ifndef TT_KMEANS_PRECISION 00366 #define TT_KMEANS_PRECISION (.01) 00367 #endif 00368 00369 // This function performs K-means clustering on the supplied data matrix 00370 // and returns the cluster centers. 00371 CvMat* kmeans(int K, const OpenCVMatrix& data) 00372 { 00373 OpenCVMatrix cluster_assignments(data.num_rows(), 1, CV_32SC1) ; 00374 00375 LINFO("MVN: computing K-means cluster assignments with OpenCV") ; 00376 cvKMeans2(data, K, cluster_assignments, 00377 cvTermCriteria(CV_TERMCRIT_EPS + CV_TERMCRIT_ITER, 00378 TT_KMEANS_ITERATIONS, TT_KMEANS_PRECISION)) ; 00379 00380 LINFO("MVN: cluster assignments done; computing centroids...") ; 00381 return compute_centroids(K, data, cluster_assignments) ; 00382 } 00383 00384 // Write the universal textons, row by row, to a plain text file. 00385 void save_universal_textons(const OpenCVMatrix& universal_textons, 00386 const std::string& file_name) 00387 { 00388 std::ofstream ofs(file_name.c_str()) ; 00389 for (int i = 0; i < universal_textons.num_rows(); ++i) { 00390 for (int j = 0; j < universal_textons.num_cols(); ++j) 00391 ofs << universal_textons.get<float>(i, j) << ' ' ; 00392 ofs << '\n' ; 00393 } 00394 } 00395 00396 // Read the universal textons from a plain text file into an Image<T> 00397 Texton load_universal_textons(const std::string& file_name) 00398 { 00399 const int M = count_lines(file_name) ; 00400 const int N = GistEstimatorTexton::NUM_FILTERS ; 00401 Texton U(N, M, ZEROS) ; 00402 00403 float f ; 00404 std::ifstream ifs(file_name.c_str()) ; 00405 for (int j = 0; j < M; ++j) 00406 for (int i = 0; i < N; ++i) { 00407 if (! ifs) 00408 throw std::runtime_error(file_name + ": out of data?!?") ; 00409 ifs >> f ; 00410 U.setVal(i, j, f) ; 00411 } 00412 00413 return U ; 00414 } 00415 00416 // The training textons are agglomerated into the following number of 00417 // clusters. 00418 // 00419 // DEVNOTE: Although not used in this section, it makes most sense to 00420 // define this symbol here. It does not fit well into the other sections 00421 // of this file. 00422 #ifndef TT_NUM_UNIVERSAL_TEXTONS 00423 #define TT_NUM_UNIVERSAL_TEXTONS 100 00424 #endif 00425 00426 } // end of local namespace encapsulating universal textons section 00427 00428 //------------------- TRAINING HISTOGRAM PROCESSING --------------------- 00429 00430 // Training is a two step process: first, we use K-means to cluster the 00431 // training set's textons to create the universal textons. Then, we 00432 // collect the histograms counting the universal textons in the training 00433 // images. The universal textons and training set's histogram "database" 00434 // are both used for image classification. 00435 namespace { 00436 00437 // Some useful types for dealing with texton histograms 00438 typedef Image<double> Histogram ; 00439 typedef std::map<std::string, Histogram> HistogramMap ; 00440 typedef HistogramMap::value_type HistogramMapEntry ; 00441 00442 // This function appends a training image's histogram to the training 00443 // histograms database file under the supplied "entry" name. As we did in 00444 // the textons accumulation function, in order to minimize possible 00445 // inconsistencies in this database, we choose to open and close the 00446 // training histograms file with each invocation of this helper rather 00447 // than keep a persistent ostream object around that obviates the need 00448 // for repeated file open/close operations. 00449 void save_histogram(const Histogram& histogram, 00450 const std::string& hist_name, 00451 const std::string& file_name) 00452 { 00453 LINFO("MVN: saving histogram %s to %s", 00454 hist_name.c_str(), file_name.c_str()) ; 00455 std::ofstream ofs(file_name.c_str(), std::ios::out | std::ios::app) ; 00456 ofs << hist_name << ' ' ; 00457 for (int y = 0; y < histogram.getHeight(); ++y) // should be just one row 00458 for (int x = 0; x < histogram.getWidth(); ++x) // should be 100 columns 00459 ofs << histogram.getVal(x, y) << ' ' ; 00460 ofs << '\n' ; 00461 } 00462 00463 // The following function reads the training histograms "database," which 00464 // is a plain text file containing one histogram per line. Each line 00465 // starts with the name of the training histogram and then come the 00466 // hundred numbers making up that histogram. 00467 HistogramMap load_training_histograms(const std::string& file_name) 00468 { 00469 HistogramMap histograms ; 00470 00471 std::ifstream ifs(file_name.c_str()) ; 00472 for(;;) 00473 { 00474 std::string str ; 00475 std::getline(ifs, str) ; 00476 if (! ifs || str.empty()) 00477 break ; 00478 std::istringstream line(str) ; 00479 00480 std::string histogram_name ; 00481 line >> histogram_name ; 00482 00483 Histogram H(TT_NUM_UNIVERSAL_TEXTONS, 1, ZEROS) ; 00484 double d ; int i = 0 ; 00485 while (line >> d) 00486 H.setVal(i++, 0, d) ; 00487 00488 histograms.insert(std::make_pair(histogram_name, H)) ; 00489 } 00490 00491 return histograms ; 00492 } 00493 00494 } // end of local namespace encapsulating training histograms section 00495 00496 //----------------------- IMAGE CLASSIFICATION -------------------------- 00497 00498 // Given the histograms for an input image and each of the training 00499 // images, we can tell which training image the input image matches most 00500 // closely by performing a chi-squared distance check between the input 00501 // image's histogram and the histograms of each of the training images. 00502 namespace { 00503 00504 // When computing the chi-square distance between the input image's 00505 // histogram and that of each of the training images, we want to be able 00506 // to tell which training image is the closest. For that, we use the 00507 // following pair that "maps" a training histogram name to its 00508 // corresponding distance. 00509 typedef std::pair<std::string, double> HistogramDistance ; 00510 00511 // To sort histogram distances, we want to compare the chi-square 00512 // measure rather than their names. 00513 bool chi_square_cmp(const HistogramDistance& L, const HistogramDistance& R) 00514 { 00515 return L.second < R.second ; 00516 } 00517 00518 // But when writing classification results, we're only interested in the 00519 // matching training image's name and not really in the chi-square 00520 // distance between its histogram and that of the input image. 00521 std::ostream& operator<<(std::ostream& os, const HistogramDistance& D) 00522 { 00523 return os << D.first ; 00524 } 00525 00526 // Given an entry from the training histograms map, the following 00527 // function object returns the chi-square distance between the input 00528 // image's histogram and the training image's histogram. 00529 class chi_square { 00530 const Histogram& input ; 00531 double distance(const Histogram&, const Histogram&) const ; 00532 public : 00533 chi_square(const Histogram& H) ; 00534 HistogramDistance operator()(const HistogramMapEntry& E) const { 00535 return std::make_pair(E.first, distance(input, E.second)) ; 00536 } 00537 } ; 00538 00539 chi_square::chi_square(const Histogram& H) 00540 : input(H) 00541 {} 00542 00543 double chi_square::distance(const Histogram& L, const Histogram& R) const 00544 { 00545 const int n = L.getWidth() ; // both should have same dimensions (100x1) 00546 double sum = 0 ; 00547 for (int i = 0; i < n; ++i) 00548 { 00549 double l = L.getVal(i, 0) ; 00550 double r = R.getVal(i, 0) ; 00551 double l_minus_r = l - r ; 00552 double l_plus_r = l + r ; 00553 if (is_zero(l_minus_r) || is_zero(l_plus_r)) 00554 continue ; 00555 sum += (l_minus_r * l_minus_r)/l_plus_r ; 00556 } 00557 return sum/2 ; 00558 } 00559 00560 // This function computes the chi-square distance between the input 00561 // image's histogram and the histograms of the training images and then 00562 // writes the top five matches to the specified results file. 00563 // 00564 // DEVNOTE: To output the top five matches to the results file, we ought 00565 // to be able to use the std::copy algorithm in conjunction with 00566 // std::ostream_iterator<HistogramDistance>. Unfortunately, ostream 00567 // iterators cannot be used with user-defined types. This is entirely in 00568 // keeping with C++'s philosophy of sucking ass most of the time but 00569 // sucking ass big-time only every now and then. 00570 void classify_image(const HistogramMapEntry& input, 00571 const HistogramMap& training_histograms, 00572 const std::string& results_file) 00573 { 00574 std::vector<HistogramDistance> chi_square_distances ; 00575 std::transform(training_histograms.begin(), training_histograms.end(), 00576 std::back_inserter(chi_square_distances), 00577 chi_square(input.second)) ; 00578 std::sort(chi_square_distances.begin(), chi_square_distances.end(), 00579 chi_square_cmp) ; 00580 00581 std::ofstream ofs(results_file.c_str(), std::ios::out | std::ios::app) ; 00582 ofs << input.first << ' ' ; 00583 //std::copy(chi_square_distances.begin(), chi_square_distances.begin() + 5, 00584 //std::ostream_iterator<HistogramDistance>(ofs, " ")) ; // ERROR! 00585 for (unsigned int i = 0; i < chi_square_distances.size() && i < 5; ++i) 00586 ofs << chi_square_distances[i] << ' ' ; 00587 ofs << '\n' ; 00588 } 00589 00590 } // end of local namespace encapsulating image classification section 00591 00592 //----------------------- COMMAND LINE OPTIONS -------------------------- 00593 00594 // This program has four distinct phases/modes of operation, each one 00595 // specified via a suitable non-option command line argument. 00596 // Additionally, it supports several command line options to allow users 00597 // to tweak various parameters such as the name of the universal textons 00598 // file, the training histograms database, and so on. 00599 namespace { 00600 00601 const ModelOptionCateg MOC_TEXTONS = { 00602 MOC_SORTPRI_3, 00603 "Options specific to the Renninger-Malik textons program", 00604 } ; 00605 00606 // In the training textons accumulation phase, we collect all the textons 00607 // of the input images into a plain text file. 00608 #ifndef TT_DEFAULT_TRAINING_TEXTONS_FILE 00609 #define TT_DEFAULT_TRAINING_TEXTONS_FILE "training_textons.txt" 00610 #endif 00611 00612 const ModelOptionDef OPT_TrainingTextons = { 00613 MODOPT_ARG_STRING, "TrainingTextons", & MOC_TEXTONS, OPTEXP_CORE, 00614 "This option specifies the name of the file where training textons\n" 00615 "should be accumulated or read from. This is a plain text file containing\n" 00616 "the training textons matrix that will be fed into the K-means procedure\n" 00617 "during the texton training phase. Each line of this file will contain a\n" 00618 "row of training textons.\n", 00619 "training-textons", '\0', "training-textons-file", 00620 TT_DEFAULT_TRAINING_TEXTONS_FILE, 00621 } ; 00622 00623 // In the texton training phase, we use the accumulated training textons 00624 // and perform K-means on them to produce the universal textons. 00625 #ifndef TT_DEFAULT_UNIVERSAL_TEXTONS_FILE 00626 #define TT_DEFAULT_UNIVERSAL_TEXTONS_FILE "universal_textons.txt" 00627 #endif 00628 00629 const ModelOptionDef OPT_UniversalTextons = { 00630 MODOPT_ARG_STRING, "UniversalTextons", & MOC_TEXTONS, OPTEXP_CORE, 00631 "This option specifies the name of the file in which the universal\n" 00632 "textons are (or are to be) stored. This is a plain text file containing\n" 00633 "the universal_textons matrix that is used for image classification.\n", 00634 "universal-textons", '\0', "universal-textons-file", 00635 TT_DEFAULT_UNIVERSAL_TEXTONS_FILE, 00636 } ; 00637 00638 // In the second phase of texton training, we count the universal textons 00639 // in the training images and store them in a training histograms 00640 // "database" under the specified "entry name." 00641 // 00642 // DEVNOTE: The default value for this option (i.e., --histogram-name) is 00643 // not very useful. This particular option really ought to be specified 00644 // on the command line. 00645 #ifndef TT_DEFAULT_TRAINING_HISTOGRAM_NAME 00646 #define TT_DEFAULT_TRAINING_HISTOGRAM_NAME "training_image" 00647 #endif 00648 00649 const ModelOptionDef OPT_HistogramName = { 00650 MODOPT_ARG_STRING, "HistogramName", & MOC_TEXTONS, OPTEXP_CORE, 00651 "This option specifies the \"root\" name of the histogram entry in\n" 00652 "the training histograms database. The histogram number will be\n" 00653 "appended to this \"root\" name. The training histograms database\n" 00654 "is a plain text file containing one histogram entry per line. The\n" 00655 "first field specifies the name plus number of the entry (e.g.,\n" 00656 "foo_1, foo_2, bar_1, and so on). The remaining fields are simply the\n" 00657 "hundred numbers making up the image's universal textons histogram.\n\n" 00658 "In classification mode, this option specifies the name of the input\n" 00659 "image's histogram that is written to the results file.\n", 00660 "histogram-name", '\0', "histogram-name-root", 00661 TT_DEFAULT_TRAINING_HISTOGRAM_NAME, 00662 } ; 00663 00664 #ifndef TT_DEFAULT_TRAINING_HISTOGRAMS_FILE 00665 #define TT_DEFAULT_TRAINING_HISTOGRAMS_FILE "training_histograms.txt" 00666 #endif 00667 00668 const ModelOptionDef OPT_HistogramFile = { 00669 MODOPT_ARG_STRING, "HistogramFile", & MOC_TEXTONS, OPTEXP_CORE, 00670 "This option specifies the name of the training histograms database,\n" 00671 "a plain text file containing one histogram entry per line. The\n" 00672 "first field specifies the name plus number of the entry (e.g.,\n" 00673 "foo_1, foo_2, bar_1, and so on). The remaining fields are simply the\n" 00674 "hundred numbers making up the image's universal textons histogram.\n", 00675 "histogram-file", '\0', "training-histograms-file", 00676 TT_DEFAULT_TRAINING_HISTOGRAMS_FILE, 00677 } ; 00678 00679 // In image classification mode, we write the results to the following 00680 // file. 00681 #ifndef TT_DEFAULT_CLASSIFICATION_RESULTS_FILE 00682 #define TT_DEFAULT_CLASSIFICATION_RESULTS_FILE "texton_classifications.txt" 00683 #endif 00684 00685 const ModelOptionDef OPT_ResultsFile = { 00686 MODOPT_ARG_STRING, "ResultsFile", & MOC_TEXTONS, OPTEXP_CORE, 00687 "This option specifies the name of the classification results file,\n" 00688 "a plain text file containing one result entry per line. The first\n" 00689 "field specifies the name of the input image plus number of the entry,\n" 00690 "(e.g., foo_1, foo_2, bar_1, and so on). Then come the names of the\n" 00691 "top five matching images from the training set.\n", 00692 "results-file", '\0', "classification-results-file", 00693 TT_DEFAULT_CLASSIFICATION_RESULTS_FILE, 00694 } ; 00695 00696 // The different operational modes of this program must be specified as 00697 // the one and only non-option command line argument. This "action" 00698 // command must be one of the following strings (case-sensitive!): 00699 // 00700 // 1. accumulate -- accumulate the training textons in the plain text 00701 // file specified by the --training-textons option (default is to 00702 // accumulate the training textons in training_textons.txt in the 00703 // current directory. 00704 // 00705 // 2. kmeans -- compute the universal textons from the training textons 00706 // using the K-means implementation in OpenCV. 00707 // 00708 // The --training-textons option can be used to specify the input file 00709 // for the K-means and --universal-textons option can be used to 00710 // specify the output file. The defaults are to read from 00711 // training_textons.txt and write to universal_textons.mat (in the 00712 // current directory). 00713 // 00714 // 3. histogram -- compute the histograms for the training set. The 00715 // output is sent to the text file specified by the --histogram-file 00716 // option. It is a good idea to also supply the --histogram-name 00717 // option when saving training histograms from an MPEG. A good choice 00718 // of the entry's name would be the basename of the MPEG file sans 00719 // extension. 00720 // 00721 // 4. classify -- uses the universal textons and histograms produced by 00722 // the kmeans and histogram commands to classify the input images 00723 // streaming in. 00724 #ifndef TT_ACCUMULATE_CMD 00725 #define TT_ACCUMULATE_CMD "accumulate" 00726 #endif 00727 #ifndef TT_KMEANS_CMD 00728 #define TT_KMEANS_CMD "kmeans" 00729 #endif 00730 #ifndef TT_HISTOGRAM_CMD 00731 #define TT_HISTOGRAM_CMD "histogram" 00732 #endif 00733 #ifndef TT_CLASSIFY_CMD 00734 #define TT_CLASSIFY_CMD "classify" 00735 #endif 00736 00737 // For printing usage info 00738 #ifndef TT_ACTIONS 00739 #define TT_ACTIONS ("{"TT_ACCUMULATE_CMD"|"TT_KMEANS_CMD"|"\ 00740 TT_HISTOGRAM_CMD"|"TT_CLASSIFY_CMD"}") 00741 #endif 00742 00743 } // end of local namespace encapsulating command line options section 00744 00745 //--------------------- SIMULATION ENCAPSULATION ------------------------ 00746 00747 // The following helper class wraps around the ModelManager and 00748 // associated objects, providing a neatly encapsulated API for the main 00749 // program. 00750 namespace { 00751 00752 class TextonSimulation { 00753 ModelManager model_manager ; 00754 nub::soft_ref<SimEventQueueConfigurator> configurator ; 00755 nub::soft_ref<StdBrain> brain ; 00756 nub::ref<SimInputFrameSeries> input_frame_series ; 00757 00758 // Various command line options specific to this program 00759 OModelParam<std::string> training_option ; 00760 OModelParam<std::string> universal_option ; 00761 OModelParam<std::string> hist_name_option ; 00762 OModelParam<std::string> hist_file_option ; 00763 OModelParam<std::string> results_option ; 00764 00765 public : 00766 TextonSimulation(const std::string& model_name) ; 00767 void parse_command_line(int argc, const char* argv[]) ; 00768 void run() ; 00769 ~TextonSimulation() ; 00770 00771 private : 00772 // The different actions performed by this program 00773 typedef void (TextonSimulation::*Action)() ; 00774 typedef std::map<std::string, Action> ActionMap ; 00775 ActionMap action_map ; 00776 00777 void accumulate_training_textons() ; 00778 void compute_universal_textons() ; 00779 void compute_training_histograms() ; 00780 void classify_input_images() ; 00781 00782 // Accessors for retrieving some of the command line arguments 00783 std::string training_textons_file() {return training_option.getVal() ;} 00784 std::string universal_textons_file() {return universal_option.getVal() ;} 00785 std::string histogram_name() {return hist_name_option.getVal() ;} 00786 std::string histogram_file() {return hist_file_option.getVal() ;} 00787 std::string results_file() {return results_option.getVal() ;} 00788 } ; 00789 00790 // On instantiation, create the model manager and the simulation's 00791 // various components. 00792 TextonSimulation::TextonSimulation(const std::string& model_name) 00793 : model_manager(model_name), 00794 configurator(new SimEventQueueConfigurator(model_manager)), 00795 brain(new StdBrain(model_manager)), 00796 input_frame_series(new SimInputFrameSeries(model_manager)), 00797 training_option(& OPT_TrainingTextons, & model_manager), 00798 universal_option(& OPT_UniversalTextons, & model_manager), 00799 hist_name_option(& OPT_HistogramName, & model_manager), 00800 hist_file_option(& OPT_HistogramFile, & model_manager), 00801 results_option(& OPT_ResultsFile, & model_manager) 00802 { 00803 model_manager.addSubComponent(configurator) ; 00804 model_manager.addSubComponent(brain) ; 00805 model_manager.addSubComponent(input_frame_series) ; 00806 00807 typedef TextonSimulation me ; // typing shortcut 00808 action_map[TT_ACCUMULATE_CMD] = & me::accumulate_training_textons ; 00809 action_map[TT_KMEANS_CMD] = & me::compute_universal_textons ; 00810 action_map[TT_HISTOGRAM_CMD] = & me::compute_training_histograms ; 00811 action_map[TT_CLASSIFY_CMD] = & me::classify_input_images ; 00812 } 00813 00814 // TODO: Do we really need the single channel save raw maps option for 00815 // this texton training program? And how can we force the gist estimator 00816 // type to be always GistEstimatorTexton? This program doesn't make sense 00817 // for any other gist estimator. 00818 void TextonSimulation::parse_command_line(int argc, const char* argv[]) 00819 { 00820 model_manager.setOptionValString(& OPT_SingleChannelSaveRawMaps, "true") ; 00821 model_manager.setOptionValString(& OPT_GistEstimatorType, "Texton") ; 00822 model_manager.setOptionValString(& OPT_NumOrientations, "6") ; 00823 00824 model_manager.setOptionValString(& OPT_TrainingTextons, 00825 TT_DEFAULT_TRAINING_TEXTONS_FILE) ; 00826 model_manager.setOptionValString(& OPT_UniversalTextons, 00827 TT_DEFAULT_UNIVERSAL_TEXTONS_FILE) ; 00828 00829 model_manager.setOptionValString(& OPT_HistogramName, 00830 TT_DEFAULT_TRAINING_HISTOGRAM_NAME) ; 00831 model_manager.setOptionValString(& OPT_HistogramFile, 00832 TT_DEFAULT_TRAINING_HISTOGRAMS_FILE) ; 00833 00834 model_manager.setOptionValString(& OPT_ResultsFile, 00835 TT_DEFAULT_CLASSIFICATION_RESULTS_FILE) ; 00836 00837 if (! model_manager.parseCommandLine(argc, argv, TT_ACTIONS, 1, 1)) 00838 throw std::runtime_error("command line parse error") ; 00839 } 00840 00841 // To run the simulation, we simply dispatch to the function 00842 // corresponding to the action (non-option) command line argument. 00843 void TextonSimulation::run() 00844 { 00845 std::string cmd(model_manager.getExtraArg(0)) ; 00846 ActionMap::iterator action = action_map.find(cmd) ; 00847 if (action == action_map.end()) 00848 throw std::runtime_error(cmd + ": sorry, unknown action") ; 00849 (this->*(action->second))() ; 00850 } 00851 00852 // Quick helper class to start and stop model manager (useful when 00853 // exceptions are thrown because destructor automatically stops the model 00854 // manager without requiring an explicit call to the stop method prior to 00855 // throwing the exception). 00856 class ModelManagerStarter { 00857 ModelManager& mgr ; 00858 public : 00859 ModelManagerStarter(ModelManager& m) : mgr(m) {mgr.start() ;} 00860 ~ModelManagerStarter() {mgr.stop() ;} 00861 } ; 00862 00863 // This method implements the simulation's main loop for the "accumulate" 00864 // action. The main loop which evolves the different components of the 00865 // simulation. Prior to starting the main loop though, it configures the 00866 // texton gist estimator's training callback, which is triggered at each 00867 // step of the brain's evolution. The texton gist estimator passes the 00868 // textons for the "current" input image to this callback, which then 00869 // proceeds to accumulate the textons in the file specified by the 00870 // --training-textons option. 00871 // 00872 // The code for the actual accumulation is in the TEXTON ACCUMULATION 00873 // section of this file. 00874 void TextonSimulation::accumulate_training_textons() 00875 { 00876 ModelManagerStarter M(model_manager) ; 00877 00878 LFATAL("sorry, this gist program is broken and needs to be fixed") ; 00879 /* 00880 nub::soft_ref<GistEstimatorTexton> ge = 00881 dynCastWeak<GistEstimatorTexton>(brain->getGE()) ; 00882 if (ge.isInvalid()) 00883 throw std::runtime_error("can only use GistEstimatorTexton") ; 00884 00885 ge->setTrainingHook(accumulate_textons) ; 00886 textons_accumulator::output_file(training_textons_file()) ; 00887 00888 nub::ref<SimEventQueue> event_queue = configurator->getQ() ; 00889 for(;;) 00890 { 00891 try 00892 { 00893 input_frame_series->evolve(*event_queue) ; 00894 brain->evolve(*event_queue) ; // triggers training hook 00895 if (event_queue->evolve() != SIM_CONTINUE) 00896 break ; 00897 } 00898 catch (lfatal_exception&) // if we seek beyond end of frame series 00899 { 00900 return ; // prevent LFATAL induced abortion 00901 } 00902 } 00903 // */ 00904 } 00905 00906 // The following method implements the "kmeans" action of this program 00907 // for clustering the training textons to obtain the 100 universal 00908 // textons. 00909 void TextonSimulation::compute_universal_textons() 00910 { 00911 LINFO("MVN: counting lines in %s", training_textons_file().c_str()) ; 00912 int num_rows = count_lines(training_textons_file()) ; 00913 00914 LINFO("MVN: reading %d training textons from %s", 00915 num_rows, training_textons_file().c_str()) ; 00916 OpenCVMatrix training_textons = 00917 load_training_textons(training_textons_file(), num_rows) ; 00918 00919 const int K = TT_NUM_UNIVERSAL_TEXTONS ; 00920 LINFO("MVN: doing K-means on training textons to get %d clusters", K) ; 00921 OpenCVMatrix universal_textons = kmeans(K, training_textons) ; 00922 00923 LINFO("MVN: K-means done; saving universal textons to %s", 00924 universal_textons_file().c_str()) ; 00925 save_universal_textons(universal_textons, universal_textons_file()) ; 00926 } 00927 00928 // This method implements the "histogram" action of this program. Like 00929 // the accumulate action, it implements a "main loop" for the simulation, 00930 // evolving different components with each iteration. But rather than 00931 // dipping into the GistEstimatorTexton's processing pipeline, it starts 00932 // the Matlab engine, loads the universal textons and then uses the 00933 // GistEstimatorTexton to obtain the histogram for each of the training 00934 // images. These histograms are saved to the training histograms database 00935 // specified by the --histogram-file option. 00936 void TextonSimulation::compute_training_histograms() 00937 { 00938 ModelManagerStarter M(model_manager) ; 00939 00940 LFATAL("sorry, this gist program is broken and needs to be fixed") ; 00941 /* 00942 nub::soft_ref<GistEstimatorTexton> ge = 00943 dynCastWeak<GistEstimatorTexton>(brain->getGE()) ; 00944 if (ge.isInvalid()) 00945 throw std::runtime_error("can only use GistEstimatorTexton") ; 00946 00947 Texton U = load_universal_textons(universal_textons_file()) ; 00948 ge->setUniversalTextons(& U) ; 00949 LINFO("MVN: loaded %d universal textons from %s", 00950 U.getHeight(), universal_textons_file().c_str()) ; 00951 00952 int i = 1 ; 00953 nub::ref<SimEventQueue> event_queue = configurator->getQ() ; 00954 for(;;) 00955 { 00956 try 00957 { 00958 input_frame_series->evolve(*event_queue) ; 00959 brain->evolve(*event_queue) ; 00960 SeC<SimEventGistOutput> gist_out = 00961 event_queue->check<SimEventGistOutput>(brain.get(), 00962 SEQ_UNMARKED | SEQ_MARK, 00963 ge.get()) ; 00964 if (gist_out) // texton GE has a gist vector waiting to be picked up 00965 save_histogram(ge->getGist(), histogram_name() + to_string(i++), 00966 histogram_file()) ; 00967 if (event_queue->evolve() != SIM_CONTINUE) 00968 break ; 00969 } 00970 catch (lfatal_exception&) // if we seek beyond end of frame series 00971 { 00972 return ; // prevent LFATAL induced abortion 00973 } 00974 } 00975 // */ 00976 } 00977 00978 // The following method implements this program's "classify" action. It 00979 // reads the training histograms database and the universal textons and 00980 // then uses a chi-square measure to compute the closest match for the 00981 // input image. 00982 void TextonSimulation::classify_input_images() 00983 { 00984 ModelManagerStarter M(model_manager) ; 00985 00986 LFATAL("sorry, this gist program is broken and needs to be fixed") ; 00987 /* 00988 nub::soft_ref<GistEstimatorTexton> ge = 00989 dynCastWeak<GistEstimatorTexton>(brain->getGE()) ; 00990 if (ge.isInvalid()) 00991 throw std::runtime_error("can only use GistEstimatorTexton") ; 00992 00993 Texton U = load_universal_textons(universal_textons_file()) ; 00994 ge->setUniversalTextons(& U) ; 00995 LINFO("MVN: loaded %d universal textons from %s", 00996 U.getHeight(), universal_textons_file().c_str()) ; 00997 00998 HistogramMap training_histograms = 00999 load_training_histograms(histogram_file()) ; 01000 LINFO("MVN: loaded %d training histograms from %s", 01001 int(training_histograms.size()), histogram_file().c_str()) ; 01002 01003 int i = 1 ; 01004 nub::ref<SimEventQueue> event_queue = configurator->getQ() ; 01005 for(;;) 01006 { 01007 try 01008 { 01009 input_frame_series->evolve(*event_queue) ; 01010 brain->evolve(*event_queue) ; 01011 SeC<SimEventGistOutput> gist_out = 01012 event_queue->check<SimEventGistOutput>(brain.get(), 01013 SEQ_UNMARKED | SEQ_MARK, 01014 ge.get()) ; 01015 if (gist_out) // texton GE has a gist vector waiting to be picked up 01016 classify_image(std::make_pair(histogram_name() + to_string(i++), 01017 ge->getGist()), 01018 training_histograms, results_file()) ; 01019 if (event_queue->evolve() != SIM_CONTINUE) 01020 break ; 01021 } 01022 catch (lfatal_exception&) // if we seek beyond end of frame series 01023 { 01024 return ; // prevent LFATAL induced abortion 01025 } 01026 } 01027 // */ 01028 } 01029 01030 // Do we really not have to delete the configurator, brain and input 01031 // frame series? If it turns out we do, this empty destructor will have 01032 // to be filled out with the necessary delete calls... 01033 TextonSimulation::~TextonSimulation() {} 01034 01035 } // end of local namespace encapsulating simulation encapsulation section 01036 01037 //------------------------------- MAIN ---------------------------------- 01038 01039 #ifdef HAVE_OPENCV 01040 01041 int main(int argc, const char* argv[]) 01042 { 01043 MYLOGVERB = LOG_INFO ; // suppress debug messages 01044 try 01045 { 01046 TextonSimulation S("train-texton Model") ; 01047 S.parse_command_line(argc, argv) ; 01048 S.run() ; 01049 } 01050 catch (std::exception& e) 01051 { 01052 LFATAL("%s", e.what()) ; 01053 return 1 ; 01054 } 01055 return 0 ; 01056 } 01057 01058 #else 01059 01060 int main() 01061 { 01062 LINFO("Sorry, this program needs OpenCV.") ; 01063 return 1 ; 01064 } 01065 01066 #endif 01067 01068 //----------------------------------------------------------------------- 01069 01070 /* So things look consistent in everyone's emacs... */ 01071 /* Local Variables: */ 01072 /* indent-tabs-mode: nil */ 01073 /* End: */