text2search.C

00001 /*!@file AppPsycho/psycho-mplayertextsearch.C Movie to text search sequence.  
00002    A set of movie clips and a text file with a set of questions and answers are given at the cmdline.  Each trial consists of a movie clip (played in mplayer with audio), followed by a question and a regular grid of answers.
00003 */
00004 
00005 // //////////////////////////////////////////////////////////////////// //
00006 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the //
00007 // University of Southern California (USC) and the iLab at USC.         //
00008 // See http://iLab.usc.edu for information about this project.          //
00009 // //////////////////////////////////////////////////////////////////// //
00010 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00011 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00012 // in Visual Environments, and Applications'' by Christof Koch and      //
00013 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00014 // pending; application number 09/912,225 filed July 23, 2001; see      //
00015 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00016 // //////////////////////////////////////////////////////////////////// //
00017 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00018 //                                                                      //
00019 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00020 // redistribute it and/or modify it under the terms of the GNU General  //
00021 // Public License as published by the Free Software Foundation; either  //
00022 // version 2 of the License, or (at your option) any later version.     //
00023 //                                                                      //
00024 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00025 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00026 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00027 // PURPOSE.  See the GNU General Public License for more details.       //
00028 //                                                                      //
00029 // You should have received a copy of the GNU General Public License    //
00030 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00031 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00032 // Boston, MA 02111-1307 USA.                                           //
00033 // //////////////////////////////////////////////////////////////////// //
00034 //
00035 // Primary maintainer for this file: John Shen <shenjohn@usc.edu>
00036 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/AppPsycho/text2search.C $
00037 
00038 #include "Component/ModelManager.H"
00039 #include "Image/Image.H"
00040 #include "Image/DrawOps.H"
00041 #include "Image/SimpleFont.H"
00042 #include "Psycho/PsychoOpts.H"
00043 #include "Component/EventLog.H"
00044 #include "Component/ComponentOpts.H"
00045 #include "Util/Types.H"
00046 #include "Util/StringConversions.H"
00047 #include "Util/StringUtil.H"
00048 #include "Media/FrameSeries.H"
00049 #include "Transport/FrameInfo.H"
00050 
00051 #include <fstream>
00052 
00053 #define HDEG 54.9
00054 
00055 typedef struct trial
00056 {
00057   std::string itsClip;
00058   std::string itsQuestion;
00059   std::vector<std::string> itsChoices;
00060   Image<PixRGB<byte> > itsQimage;
00061   Image<PixRGB<byte> > itsSimage;
00062   Image<PixRGB<byte> > itsAimage;
00063   int itsFamily;
00064   uint itsIAnswer;
00065 } SearchTrial;
00066 
00067 // ######################################################################
00068 int submain(const int argc, char** argv)
00069 {
00070   
00071   // ********************************************************************
00072   // *** This portion initializes all the components ********************
00073   // ********************************************************************
00074  
00075   MYLOGVERB = LOG_INFO;  // suppress debug messages
00076 
00077   // Instantiate a ModelManager:
00078   ModelManager manager("Text Search Array");
00079 
00080   // Instantiate an output frame series:
00081   nub::soft_ref<OutputFrameSeries> ofs(new OutputFrameSeries(manager));
00082   manager.addSubComponent(ofs);
00083 
00084   // Parse command-line:
00085   if (manager.parseCommandLine(argc, argv, 
00086                                "<textfile> visual-angle-of-single-character grid-rows grid-columns <index> <xml-output>", 
00087                                6,6)==false)
00088     return(1);
00089 
00090   // let's get all our ModelComponent instances started:
00091   manager.start();
00092 
00093   // create an image frame for each sentence in our text file and store
00094   // it in a vector before we start the experiment, then we can just
00095   // present each frame like in psycho still
00096 
00097   // First read the text file and all the sentences
00098   // load our file
00099   std::ifstream *itsFile;
00100   itsFile = new std::ifstream(manager.getExtraArg(0).c_str());
00101   
00102   //error if no file
00103   if (itsFile->is_open() == false)
00104     LFATAL("Cannot open '%s' for reading",manager.getExtraArg(0).c_str());
00105   
00106   //some storage variables
00107   std::string line;
00108   std::string clipstem = "";
00109   std::vector<SearchTrial> expt(100);
00110   uint num_trials = 0, num_stems = 0;
00111   std::vector<uint> curr_stem_index;
00112   
00113   //loop through lines of file
00114   while (!itsFile->eof())
00115     {
00116       getline(*itsFile, line, '\n');
00117                                                 
00118       //store the sentence and type (question or statement)
00119         if (line[0] == '>')//video
00120         {
00121           line.erase(0,1);
00122           expt[num_trials].itsClip = line;
00123 
00124           //clip filename has format <stem>[a-z].avi
00125           if(line.compare(0,line.size()-5,clipstem) != 0) //new stem
00126             {
00127               num_stems++;
00128               clipstem = line.substr(0,line.size()-5);
00129               curr_stem_index.push_back(num_trials);
00130             }
00131           expt[num_trials].itsFamily = num_stems;
00132           num_trials++;
00133         }
00134         else if (line[0] == '#') //question, always one line
00135         {
00136           line.erase(0,1);
00137           expt[num_trials-1].itsQuestion = line;
00138         }
00139         else if (line[0] == '!') //choice, first choice
00140         {
00141           if(line[1] == '$') //also correct choice
00142             {
00143               line.erase(0,1); 
00144               expt[num_trials-1].itsIAnswer = 0;
00145             }
00146           line.erase(0,1);
00147           expt[num_trials-1].itsChoices.push_back(line);
00148         }
00149         else if (line[0] == '$') //correct choice
00150           {
00151             line.erase(0,1);
00152              expt[num_trials-1].itsIAnswer = expt[num_trials-1].itsChoices.size();
00153              expt[num_trials-1].itsChoices.push_back(line);
00154         }            
00155         else if (line[0] == '&')//sub for a carriage return
00156         {
00157           //not handled yet
00158         }
00159         else //choice, subsequent choices
00160         {
00161           expt[num_trials-1].itsChoices.push_back(line);
00162         }        
00163     }
00164   itsFile->close();
00165 
00166   //now we have stored all of our sentences, lets create our search images
00167   int w = 1920;//width and height of SDL surface
00168   int h = 1080;
00169 
00170   double fontsize = fromStr<double>(manager.getExtraArg(1));
00171   uint fontwidth = uint(fontsize * w / HDEG);
00172   SimpleFont fnt = SimpleFont::fixedMaxWidth(fontwidth); //font
00173   
00174   //store a grid of equally spaced coordinates in a gridrows x gridcols grid;
00175   const uint gridrows = fromStr<uint>(manager.getExtraArg(2));
00176   const uint gridcols = fromStr<uint>(manager.getExtraArg(3));
00177   const uint gridslots = gridrows*gridcols;
00178   std::vector<int> x_coords(gridslots);
00179   std::vector<int> y_coords(gridslots);
00180   for (uint i = 0; i < gridrows; i++)
00181     {
00182     for(uint j = 0; j < gridcols; j++)
00183       {
00184         x_coords[gridcols*i+j] = (int( double(w*(j+1)) / (gridcols+1)));
00185         y_coords[gridcols*i+j] = (int( double(h*(i+1)) / (gridrows+1)));
00186      }
00187     }
00188 
00189   Point2D<int> tanchor;
00190   
00191   for (uint i = 0; i < num_trials; i++)
00192   {
00193     int space = 0;
00194     int hanchor = int(h/2) - int(fnt.h()/2); //center character half a height behind
00195     expt[i].itsQimage.resize(w,h);
00196     PixRGB<byte> gr(128,128,128);
00197     expt[i].itsQimage.clear(gr);
00198     
00199     space = int( double(w - fnt.w() * expt[i].itsQuestion.size()) / 2.0 );
00200     tanchor = Point2D<int>(space, hanchor);
00201       
00202     writeText(expt[i].itsQimage,tanchor,expt[i].itsQuestion.c_str(),
00203                     PixRGB<byte>(0,0,0),
00204                     gr,
00205                     fnt);       
00206     
00207     expt[i].itsSimage.resize(w,h);
00208     expt[i].itsSimage.clear(gr);
00209     expt[i].itsAimage.resize(w,h);
00210     expt[i].itsAimage.clear(gr);
00211 
00212     for (uint j = 0; j < expt[i].itsChoices.size(); j++)
00213     {
00214       //place each choice in its place
00215       if(j >= gridslots) //if there are too many choices
00216       {
00217         LDEBUG("Trial %d, clip %s: Too many answer choices for the grid", i, expt[i].itsClip.c_str());
00218         break;
00219       }
00220       space = x_coords[j] - int( double(fnt.w() * expt[i].itsChoices[j].length()) / 2.0);
00221       hanchor = y_coords[j] - int(fnt.h()/2);
00222       tanchor = Point2D<int>(space, hanchor);
00223       
00224       Point2D<int> center(x_coords[j],y_coords[j]);
00225 
00226       //write to search image
00227       writeText(expt[i].itsSimage,tanchor,expt[i].itsChoices[j].c_str(),
00228                     PixRGB<byte>(0,0,0),
00229                     gr,
00230                     fnt);       
00231 
00232       //write to answer image
00233       if(j == expt[i].itsIAnswer)
00234         {
00235           PixRGB<byte> fill(64,192,64);
00236           const Dims rectsize(w/(gridcols+1),h/(gridrows+1));
00237           Point2D<int> rSize(w/(gridcols+1),h/(gridrows+1));
00238           Point2D<int> corner = center-rSize/2;
00239           const Rectangle correctRect(corner,rectsize);
00240           //const int radius = int(1.5*HDEG);
00241           
00242           drawFilledRect(expt[i].itsAimage,correctRect,fill);
00243           //drawDisk(expt[i].itsAimage,center,radius, fill);
00244           writeText(expt[i].itsAimage,tanchor,expt[i].itsChoices[j].c_str(),
00245                   PixRGB<byte>(0,0,0), //invert colors
00246                   fill, fnt);
00247        
00248         }
00249         else
00250           writeText(expt[i].itsAimage,tanchor,expt[i].itsChoices[j].c_str(),
00251                     PixRGB<byte>(0,0,0),
00252                     gr, fnt);       
00253 
00254        
00255     }
00256   
00257     
00258   }
00259 
00260   const uint questionNum = fromStr<uint>(manager.getExtraArg(4));
00261 
00262   //check for void OFS
00263   if (ofs->becameVoid())
00264   {
00265      LINFO("quitting because output stream was closed or became void");
00266      return 0;
00267   }
00268  
00269   //update ofs
00270   // FrameState os = 
00271     ofs->updateNext();
00272       
00273   //write out image
00274   ofs->writeRGB(expt[questionNum].itsQimage, "output", 
00275                 FrameInfo("Text embedded image",SRC_POS));
00276   
00277   //os = 
00278     ofs->updateNext();
00279   ofs->writeRGB(expt[questionNum].itsSimage, "output",
00280                 FrameInfo("Searchtext embedded image", SRC_POS));
00281 
00282   // os = 
00283     ofs->updateNext();
00284   ofs->writeRGB(expt[questionNum].itsAimage, "output",
00285                 FrameInfo("Search embedded image with answer", SRC_POS));
00286 
00287   // stop all our ModelComponents
00288   manager.stop();
00289 
00290   // all done!
00291   return 0;
00292 }
00293 
00294 extern "C" int main(const int argc, char** argv)
00295 {
00296   // simple wrapper around submain() to catch exceptions (because we
00297   // want to allow PsychoDisplay to shut down cleanly; otherwise if we
00298   // abort while SDL is in fullscreen mode, the X server won't return
00299   // to its original resolution)
00300   try
00301     {
00302       return submain(argc, argv);
00303     }
00304   catch (...)
00305     {
00306       REPORT_CURRENT_EXCEPTION;
00307     }
00308 
00309   return 1;
00310 }
00311 
00312 // ######################################################################
00313 /* So things look consistent in everyone's emacs... */
00314 /* Local Variables: */
00315 /* indent-tabs-mode: nil */
00316 /* End: */