InferoTemporal.C

Go to the documentation of this file.
00001 /*!@file Neuro/InferoTemporal.C Object recognition module */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the //
00005 // University of Southern California (USC) and the iLab at USC.         //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: Laurent Itti <itti@usc.edu>
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Neuro/InferoTemporal.C $
00035 // $Id: InferoTemporal.C 14390 2011-01-13 20:17:22Z pez $
00036 //
00037 
00038 #include "Neuro/InferoTemporal.H"
00039 
00040 #include "Component/OptionManager.H"
00041 #include "Image/CutPaste.H"
00042 #include "Neuro/NeuroOpts.H"
00043 #include "Neuro/NeuroSimEvents.H"
00044 #include "Neuro/Brain.H"
00045 #include "Neuro/VisualCortex.H"
00046 #include "SIFT/VisualObjectDB.H"
00047 #include "SIFT/VisualObject.H"
00048 
00049 #include <cstdlib>
00050 #include <iostream>
00051 
00052 // ######################################################################
00053 namespace
00054 {
00055   Image<PixRGB<byte> > getCroppedObject(const Image<PixRGB<byte> >& scene,
00056                                         const Image<float>& smoothMask)
00057   {
00058     if (!scene.initialized())
00059       return Image<PixRGB<byte> >();
00060 
00061     if (!smoothMask.initialized())
00062       return Image<PixRGB<byte> >();
00063 
00064     const float threshold = 1.0f;
00065 
00066     const Rectangle r = findBoundingRect(smoothMask, threshold);
00067     return crop(scene, r);
00068   }
00069 }
00070 
00071 // ######################################################################
00072 InferoTemporal::InferoTemporal(OptionManager& mgr,
00073                                const std::string& descrName,
00074                                const std::string& tagName) :
00075   SimModule(mgr, descrName, tagName),
00076   SIMCALLBACK_INIT(SimEventWTAwinner)
00077 { }
00078 
00079 // ######################################################################
00080 InferoTemporal::~InferoTemporal()
00081 { }
00082 
00083 // ######################################################################
00084 void InferoTemporal::
00085 onSimEventWTAwinner(SimEventQueue& q, rutz::shared_ptr<SimEventWTAwinner>& e)
00086 {
00087   this->attentionShift(q, e->winner().p);
00088 }
00089 
00090 // ######################################################################
00091 InferoTemporalStub::InferoTemporalStub(OptionManager& mgr,
00092                                        const std::string& descrName,
00093                                        const std::string& tagName)
00094   :
00095   InferoTemporal(mgr, descrName, tagName)
00096 {}
00097 
00098 // ######################################################################
00099 InferoTemporalStub::~InferoTemporalStub()
00100 {}
00101 
00102 // ######################################################################
00103 void InferoTemporalStub::attentionShift(SimEventQueue& q,
00104                                         const Point2D<int>& location)
00105 {}
00106 
00107 // ######################################################################
00108 InferoTemporalStd::InferoTemporalStd(OptionManager& mgr,
00109                                      const std::string& descrName,
00110                                      const std::string& tagName) :
00111   InferoTemporal(mgr, descrName, tagName),
00112   itsUseAttention(&OPT_AttentionObjRecog, this),
00113   itsObjectDatabaseFile(&OPT_ObjectDatabaseFileName, this),
00114   itsTrainObjectDB(&OPT_TrainObjectDB, this),
00115   itsPromptUserTrainDB(&OPT_PromptUserTrainDB, this),
00116   itsMatchObjects(&OPT_MatchObjects, this),
00117   itsRecogMinMatch(&OPT_RecognitionMinMatch, this),
00118   itsMatchingAlg(&OPT_MatchingAlgorithm, this),
00119   itsObjectDB(new VisualObjectDB())
00120 { }
00121 
00122 // ######################################################################
00123 void InferoTemporalStd::start1()
00124 {
00125   // if no filename given for our object DB, start empty, otherwise load it:
00126   if (itsObjectDatabaseFile.getVal().empty())
00127     LINFO("Starting with empty VisualObjectDB.");
00128   else
00129     itsObjectDB->loadFrom(itsObjectDatabaseFile.getVal());
00130 
00131   InferoTemporal::start1();
00132 }
00133 
00134 // ######################################################################
00135 void InferoTemporalStd::stop1()
00136 {
00137   // save database if we have a filename for it:
00138   if (itsObjectDatabaseFile.getVal().empty() == false)
00139     itsObjectDB->saveTo(itsObjectDatabaseFile.getVal());
00140 }
00141 
00142 // ######################################################################
00143 InferoTemporalStd::~InferoTemporalStd()
00144 {}
00145 
00146 // ######################################################################
00147 void InferoTemporalStd::attentionShift(SimEventQueue& q,
00148                                        const Point2D<int>& location)
00149 {
00150   Image<PixRGB<float> > objImg;
00151 
00152   // get the lastest input frame from the retina:
00153   if (SeC<SimEventRetinaImage> e = q.check<SimEventRetinaImage>(this))
00154     objImg = e->frame().colorByte();
00155   else
00156     LFATAL("Oooops, no input frame in the event queue?");
00157 
00158   // get the latest smooth mask from the shape estimator:
00159   Image<float> smoothMask;
00160   if (SeC<SimEventShapeEstimatorOutput>
00161       e = q.check<SimEventShapeEstimatorOutput>(this))
00162     smoothMask = e->smoothMask();
00163 
00164   // crop around object using mask?
00165   if (itsUseAttention.getVal())
00166     objImg = getCroppedObject(objImg, smoothMask);
00167 
00168   if (!objImg.initialized()) return; // no object image, so just do nothing
00169 
00170   rutz::shared_ptr<SimReqVCXfeatures> ef(new SimReqVCXfeatures(this, location));
00171   q.request(ef); // VisualCortex is now filling-in the features into ef->features()
00172 
00173   // create a new VisualObject. Since we give it no keypoints, they
00174   // will be automatically computed:
00175   rutz::shared_ptr<VisualObject>
00176     obj(new VisualObject("NewObject", "NewObject", objImg, location, ef->features()));
00177 
00178   // Try to match this to the objects in our database:
00179   if (itsMatchObjects.getVal())
00180     {
00181       // we need to have at least 3 keypoints to consider this a
00182       // serious object candidate:
00183       if (obj->numKeypoints() < 3)
00184         { LINFO("Not enough Keypoints -- NO RECOGNITION"); return; }
00185 
00186       LINFO("Attempting object recognition...");
00187       std::vector< rutz::shared_ptr<VisualObjectMatch> > matches;
00188 
00189       const uint nm =
00190         itsObjectDB->getObjectMatches(obj, matches, VOMA_KDTREEBBF,
00191                                       100U, 0.5F, 0.5F, 1.0F,
00192                                       uint(itsRecogMinMatch.getVal()),
00193                                       6U, false);
00194       // If an object was identified
00195       if (nm > 0)
00196         {
00197           LINFO("***** %u object recognition match(es) *****", nm);
00198           for (uint i = 0 ; i < nm; i ++)
00199             LINFO("   Match with '%s' [score = %f]",
00200                   matches[i]->getVoTest()->getName().c_str(),
00201                   matches[i]->getScore());
00202         }
00203       else
00204         LINFO("***** Could not identify attended object! *****");
00205     }
00206 
00207   // do we want to train the database?
00208   if (itsTrainObjectDB.getVal())
00209     {
00210       std::string objname;
00211 
00212       // if interactive, ask the user for a name:
00213       if (itsPromptUserTrainDB.getVal())
00214         {
00215           LINFO("Enter name for new object or [RETURN] to skip training:");
00216           std::getline(std::cin, objname, '\n');
00217         }
00218       else
00219         {
00220           // get a unique random name for the object:
00221           char tmpn[14]; strcpy(tmpn, "Object-XXXXXX");
00222           if(mkstemp(tmpn) == -1)
00223             LFATAL("mkstemp failed");
00224           objname = tmpn;
00225         }
00226 
00227       // train the database:
00228       if (objname.length() > 0)
00229         {
00230           LINFO("Adding new object '%s' to database.", objname.c_str());
00231           obj->setName(objname);
00232           obj->setImageFname(objname + ".png");
00233           itsObjectDB->addObject(obj);
00234         }
00235 
00236     }
00237 }
00238 
00239 // ######################################################################
00240 /* So things look consistent in everyone's emacs... */
00241 /* Local Variables: */
00242 /* indent-tabs-mode: nil */
00243 /* End: */