00001 /*!@file AppPsycho/psycho-mplayertextsearch.C Movie to text search sequence. 00002 A set of movie clips and a text file with a set of questions and answers are given at the cmdline. Each trial consists of a movie clip (played in mplayer with audio), followed by a question and a regular grid of answers. 00003 */ 00004 00005 // //////////////////////////////////////////////////////////////////// // 00006 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00007 // University of Southern California (USC) and the iLab at USC. // 00008 // See http://iLab.usc.edu for information about this project. // 00009 // //////////////////////////////////////////////////////////////////// // 00010 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00011 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00012 // in Visual Environments, and Applications'' by Christof Koch and // 00013 // Laurent Itti, California Institute of Technology, 2001 (patent // 00014 // pending; application number 09/912,225 filed July 23, 2001; see // 00015 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00016 // //////////////////////////////////////////////////////////////////// // 00017 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00018 // // 00019 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00020 // redistribute it and/or modify it under the terms of the GNU General // 00021 // Public License as published by the Free Software Foundation; either // 00022 // version 2 of the License, or (at your option) any later version. // 00023 // // 00024 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00025 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00026 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00027 // PURPOSE. See the GNU General Public License for more details. // 00028 // // 00029 // You should have received a copy of the GNU General Public License // 00030 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00031 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00032 // Boston, MA 02111-1307 USA. // 00033 // //////////////////////////////////////////////////////////////////// // 00034 // 00035 // Primary maintainer for this file: John Shen <shenjohn@usc.edu> 00036 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/AppPsycho/text2search.C $ 00037 00038 #include "Component/ModelManager.H" 00039 #include "Image/Image.H" 00040 #include "Image/DrawOps.H" 00041 #include "Image/SimpleFont.H" 00042 #include "Psycho/PsychoOpts.H" 00043 #include "Component/EventLog.H" 00044 #include "Component/ComponentOpts.H" 00045 #include "Util/Types.H" 00046 #include "Util/StringConversions.H" 00047 #include "Util/StringUtil.H" 00048 #include "Media/FrameSeries.H" 00049 #include "Transport/FrameInfo.H" 00050 00051 #include <fstream> 00052 00053 #define HDEG 54.9 00054 00055 typedef struct trial 00056 { 00057 std::string itsClip; 00058 std::string itsQuestion; 00059 std::vector<std::string> itsChoices; 00060 Image<PixRGB<byte> > itsQimage; 00061 Image<PixRGB<byte> > itsSimage; 00062 Image<PixRGB<byte> > itsAimage; 00063 int itsFamily; 00064 uint itsIAnswer; 00065 } SearchTrial; 00066 00067 // ###################################################################### 00068 int submain(const int argc, char** argv) 00069 { 00070 00071 // ******************************************************************** 00072 // *** This portion initializes all the components ******************** 00073 // ******************************************************************** 00074 00075 MYLOGVERB = LOG_INFO; // suppress debug messages 00076 00077 // Instantiate a ModelManager: 00078 ModelManager manager("Text Search Array"); 00079 00080 // Instantiate an output frame series: 00081 nub::soft_ref<OutputFrameSeries> ofs(new OutputFrameSeries(manager)); 00082 manager.addSubComponent(ofs); 00083 00084 // Parse command-line: 00085 if (manager.parseCommandLine(argc, argv, 00086 "<textfile> visual-angle-of-single-character grid-rows grid-columns <index> <xml-output>", 00087 6,6)==false) 00088 return(1); 00089 00090 // let's get all our ModelComponent instances started: 00091 manager.start(); 00092 00093 // create an image frame for each sentence in our text file and store 00094 // it in a vector before we start the experiment, then we can just 00095 // present each frame like in psycho still 00096 00097 // First read the text file and all the sentences 00098 // load our file 00099 std::ifstream *itsFile; 00100 itsFile = new std::ifstream(manager.getExtraArg(0).c_str()); 00101 00102 //error if no file 00103 if (itsFile->is_open() == false) 00104 LFATAL("Cannot open '%s' for reading",manager.getExtraArg(0).c_str()); 00105 00106 //some storage variables 00107 std::string line; 00108 std::string clipstem = ""; 00109 std::vector<SearchTrial> expt(100); 00110 uint num_trials = 0, num_stems = 0; 00111 std::vector<uint> curr_stem_index; 00112 00113 //loop through lines of file 00114 while (!itsFile->eof()) 00115 { 00116 getline(*itsFile, line, '\n'); 00117 00118 //store the sentence and type (question or statement) 00119 if (line[0] == '>')//video 00120 { 00121 line.erase(0,1); 00122 expt[num_trials].itsClip = line; 00123 00124 //clip filename has format <stem>[a-z].avi 00125 if(line.compare(0,line.size()-5,clipstem) != 0) //new stem 00126 { 00127 num_stems++; 00128 clipstem = line.substr(0,line.size()-5); 00129 curr_stem_index.push_back(num_trials); 00130 } 00131 expt[num_trials].itsFamily = num_stems; 00132 num_trials++; 00133 } 00134 else if (line[0] == '#') //question, always one line 00135 { 00136 line.erase(0,1); 00137 expt[num_trials-1].itsQuestion = line; 00138 } 00139 else if (line[0] == '!') //choice, first choice 00140 { 00141 if(line[1] == '$') //also correct choice 00142 { 00143 line.erase(0,1); 00144 expt[num_trials-1].itsIAnswer = 0; 00145 } 00146 line.erase(0,1); 00147 expt[num_trials-1].itsChoices.push_back(line); 00148 } 00149 else if (line[0] == '$') //correct choice 00150 { 00151 line.erase(0,1); 00152 expt[num_trials-1].itsIAnswer = expt[num_trials-1].itsChoices.size(); 00153 expt[num_trials-1].itsChoices.push_back(line); 00154 } 00155 else if (line[0] == '&')//sub for a carriage return 00156 { 00157 //not handled yet 00158 } 00159 else //choice, subsequent choices 00160 { 00161 expt[num_trials-1].itsChoices.push_back(line); 00162 } 00163 } 00164 itsFile->close(); 00165 00166 //now we have stored all of our sentences, lets create our search images 00167 int w = 1920;//width and height of SDL surface 00168 int h = 1080; 00169 00170 double fontsize = fromStr<double>(manager.getExtraArg(1)); 00171 uint fontwidth = uint(fontsize * w / HDEG); 00172 SimpleFont fnt = SimpleFont::fixedMaxWidth(fontwidth); //font 00173 00174 //store a grid of equally spaced coordinates in a gridrows x gridcols grid; 00175 const uint gridrows = fromStr<uint>(manager.getExtraArg(2)); 00176 const uint gridcols = fromStr<uint>(manager.getExtraArg(3)); 00177 const uint gridslots = gridrows*gridcols; 00178 std::vector<int> x_coords(gridslots); 00179 std::vector<int> y_coords(gridslots); 00180 for (uint i = 0; i < gridrows; i++) 00181 { 00182 for(uint j = 0; j < gridcols; j++) 00183 { 00184 x_coords[gridcols*i+j] = (int( double(w*(j+1)) / (gridcols+1))); 00185 y_coords[gridcols*i+j] = (int( double(h*(i+1)) / (gridrows+1))); 00186 } 00187 } 00188 00189 Point2D<int> tanchor; 00190 00191 for (uint i = 0; i < num_trials; i++) 00192 { 00193 int space = 0; 00194 int hanchor = int(h/2) - int(fnt.h()/2); //center character half a height behind 00195 expt[i].itsQimage.resize(w,h); 00196 PixRGB<byte> gr(128,128,128); 00197 expt[i].itsQimage.clear(gr); 00198 00199 space = int( double(w - fnt.w() * expt[i].itsQuestion.size()) / 2.0 ); 00200 tanchor = Point2D<int>(space, hanchor); 00201 00202 writeText(expt[i].itsQimage,tanchor,expt[i].itsQuestion.c_str(), 00203 PixRGB<byte>(0,0,0), 00204 gr, 00205 fnt); 00206 00207 expt[i].itsSimage.resize(w,h); 00208 expt[i].itsSimage.clear(gr); 00209 expt[i].itsAimage.resize(w,h); 00210 expt[i].itsAimage.clear(gr); 00211 00212 for (uint j = 0; j < expt[i].itsChoices.size(); j++) 00213 { 00214 //place each choice in its place 00215 if(j >= gridslots) //if there are too many choices 00216 { 00217 LDEBUG("Trial %d, clip %s: Too many answer choices for the grid", i, expt[i].itsClip.c_str()); 00218 break; 00219 } 00220 space = x_coords[j] - int( double(fnt.w() * expt[i].itsChoices[j].length()) / 2.0); 00221 hanchor = y_coords[j] - int(fnt.h()/2); 00222 tanchor = Point2D<int>(space, hanchor); 00223 00224 Point2D<int> center(x_coords[j],y_coords[j]); 00225 00226 //write to search image 00227 writeText(expt[i].itsSimage,tanchor,expt[i].itsChoices[j].c_str(), 00228 PixRGB<byte>(0,0,0), 00229 gr, 00230 fnt); 00231 00232 //write to answer image 00233 if(j == expt[i].itsIAnswer) 00234 { 00235 PixRGB<byte> fill(64,192,64); 00236 const Dims rectsize(w/(gridcols+1),h/(gridrows+1)); 00237 Point2D<int> rSize(w/(gridcols+1),h/(gridrows+1)); 00238 Point2D<int> corner = center-rSize/2; 00239 const Rectangle correctRect(corner,rectsize); 00240 //const int radius = int(1.5*HDEG); 00241 00242 drawFilledRect(expt[i].itsAimage,correctRect,fill); 00243 //drawDisk(expt[i].itsAimage,center,radius, fill); 00244 writeText(expt[i].itsAimage,tanchor,expt[i].itsChoices[j].c_str(), 00245 PixRGB<byte>(0,0,0), //invert colors 00246 fill, fnt); 00247 00248 } 00249 else 00250 writeText(expt[i].itsAimage,tanchor,expt[i].itsChoices[j].c_str(), 00251 PixRGB<byte>(0,0,0), 00252 gr, fnt); 00253 00254 00255 } 00256 00257 00258 } 00259 00260 const uint questionNum = fromStr<uint>(manager.getExtraArg(4)); 00261 00262 //check for void OFS 00263 if (ofs->becameVoid()) 00264 { 00265 LINFO("quitting because output stream was closed or became void"); 00266 return 0; 00267 } 00268 00269 //update ofs 00270 // FrameState os = 00271 ofs->updateNext(); 00272 00273 //write out image 00274 ofs->writeRGB(expt[questionNum].itsQimage, "output", 00275 FrameInfo("Text embedded image",SRC_POS)); 00276 00277 //os = 00278 ofs->updateNext(); 00279 ofs->writeRGB(expt[questionNum].itsSimage, "output", 00280 FrameInfo("Searchtext embedded image", SRC_POS)); 00281 00282 // os = 00283 ofs->updateNext(); 00284 ofs->writeRGB(expt[questionNum].itsAimage, "output", 00285 FrameInfo("Search embedded image with answer", SRC_POS)); 00286 00287 // stop all our ModelComponents 00288 manager.stop(); 00289 00290 // all done! 00291 return 0; 00292 } 00293 00294 extern "C" int main(const int argc, char** argv) 00295 { 00296 // simple wrapper around submain() to catch exceptions (because we 00297 // want to allow PsychoDisplay to shut down cleanly; otherwise if we 00298 // abort while SDL is in fullscreen mode, the X server won't return 00299 // to its original resolution) 00300 try 00301 { 00302 return submain(argc, argv); 00303 } 00304 catch (...) 00305 { 00306 REPORT_CURRENT_EXCEPTION; 00307 } 00308 00309 return 1; 00310 } 00311 00312 // ###################################################################### 00313 /* So things look consistent in everyone's emacs... */ 00314 /* Local Variables: */ 00315 /* indent-tabs-mode: nil */ 00316 /* End: */