NeoBrain.H

00001 /*!@file Neuro/NeoBrainVss.H for the vss demo*/
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005   //
00005 // by the University of Southern California (USC) and the iLab at USC.  //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: Lior Elazary <elazary@usc.edu>
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Neuro/NeoBrain.H $
00035 // $Id: NeoBrain.H 13716 2010-07-28 22:07:03Z itti $
00036 //
00037 
00038 #ifndef NEURO_NEOBRAIN_H_DEFINED
00039 #define NEURO_NEOBRAIN_H_DEFINED
00040 
00041 #include "Image/OpenCVUtil.H"  // must be first to avoid conflicting defs of int64, uint64
00042 
00043 #include "Component/ModelComponent.H"
00044 #include "Component/ModelParam.H"
00045 #include "Devices/BeoHead.H"
00046 #include "Devices/SpeechSynth.H"
00047 #include "Image/ColorOps.H"
00048 #include "Image/DrawOps.H"
00049 #include "Image/CutPaste.H"
00050 #include "Image/Image.H"
00051 #include "Image/Pixels.H"
00052 #include "Image/OpenCVUtil.H"
00053 #include "Component/ParamMap.H"
00054 #include "Util/sformat.H"
00055 #include "rutz/mutex.h"
00056 #include "Util/WorkThreadServer.H"
00057 #include "Util/JobWithSemaphore.H"
00058 
00059 #include <pthread.h>
00060 #include <string>
00061 #include <vector>
00062 #include <map>
00063 
00064 //Define VSS_DEMO so that neovision2.C would include some spacific functions
00065 #define VSS_DEMO 1
00066 
00067 class NeoBrain : public ModelComponent
00068 {
00069 public:
00070   enum State {CHECK_TARGET, TRACK_TARGET};
00071 
00072   //! Constructor
00073   NeoBrain(OptionManager& mgr,
00074            const std::string& descrName = "NeoBrain",
00075            const std::string& tagName = "NeoBrain");
00076 
00077   //! virtual destructor for safe inheritance
00078   virtual ~NeoBrain();
00079 
00080   void init(Dims imageDims, int nPoints = 1, int wz = 10);
00081 
00082   //tracking
00083   void setTarget(Point2D<int> loc, const Image<byte>& grey,
00084       int saliencyval=-1, bool changeState = false, bool forceNewLocation = false);
00085   void setTarget(Point2D<int> loc, const Image<PixRGB<byte> >& rgb,
00086       int saliencyval=-1, bool changeState = false, bool forceNewLocation = false)
00087   {
00088     if (itsAllowTracking.getVal())
00089       this->setTarget(loc, luminance(rgb), saliencyval, changeState, forceNewLocation);
00090   }
00091   Point2D<int> trackObject(const Image<byte>& grey);
00092   std::vector<Point2D<int> > getTrackersLoc(const Image<byte>& grey);
00093   Point2D<int> trackObject(const Image<PixRGB<byte> >& rgb)
00094   {
00095     if (itsAllowTracking.getVal())
00096       return this->trackObject(luminance(rgb));
00097     else return Point2D<int>(-1,-1);
00098   }
00099 
00100 
00101 
00102   void saySaliencyVal(byte val);
00103 
00104   void moveHeadToTarget();
00105 
00106   void updateBoringness(const Image<byte>& salmap, byte foaval);
00107 
00108   void updateExcitement(double vcxflicker);
00109 
00110   float getBoringness() const;
00111 
00112   float getExcitementLevel() const;
00113   float getSleepLevel() const;
00114 
00115   bool sayText(const std::string& text, int priority, bool block = false) const;
00116 
00117   std::string getToken(const std::string& token, int val) const;
00118 
00119   bool sayToken(const std::string& token, int val, int priority) const;
00120 
00121   bool sayObjectLabel(const std::string& label, int confidence, bool foceLabel=false);
00122 
00123   struct Stats
00124   {
00125     unsigned long bigerrframes, bigerrframes_thresh;
00126     unsigned long targetframes, targetframes_thresh;
00127     unsigned long nomoveframes, nomoveframes_thresh;
00128     unsigned long stopframes, stopframes_thresh;
00129     float last_err, last_xerr, last_yerr, err_tol, dist_tol;
00130     float leftEyePanPos, leftEyeTiltPos;
00131     float rightEyePanPos, rightEyeTiltPos;
00132     float headPanPos, headTiltPos, headYawPos;
00133   };
00134 
00135   Stats getStats() const { return itsStats; }
00136 
00137   Image<PixRGB<byte> > getSaliencyHisto(const Dims& dims,
00138                                         const PixRGB<byte>& back,
00139                                         const PixRGB<byte>& fore)
00140   {
00141     if (!itsSpeakSaliency.getVal())
00142       {
00143         Image<PixRGB<byte> > result(dims, NO_INIT);
00144         result.clear(back);
00145         return result;
00146       }
00147 
00148     TokenType ttype;
00149     {
00150       GVX_MUTEX_LOCK(&itsSpeechTokenMapMutex);
00151       TokenMap::const_iterator itr = itsSpeechTokenMap.find("new_target");
00152       if (itr == itsSpeechTokenMap.end())
00153         LERROR("couldn't find new_target token type");
00154       else
00155         ttype = (*itr).second;
00156     }
00157     return ttype.makeTokenHistogram(dims, back, fore);
00158   }
00159 
00160   //for setting model params
00161 
00162  bool getUseHead() {return itsUseHead.getVal(); }
00163  bool getRelaxNeck() {return itsRelaxNeck.getVal(); }
00164  int  getBoringnessThresh() {return itsBoringnessThresh.getVal(); }
00165  int  getErrTolerance() {return itsErrTolerance.getVal(); }
00166  int  getDistTolerance() {return itsDistTolerance.getVal(); }
00167 
00168  long getTrackDelayFrames()     { return itsTrackDelayFrames.getVal(); }
00169  long getBigErrFramesThresh()   { return itsBigErrFramesThresh.getVal(); }
00170  long getTargetFramesThresh()   { return itsTargetFramesThresh.getVal(); }
00171  long getNoMoveFramesThresh()   { return itsNoMoveFramesThresh.getVal(); }
00172  long getStopTrackDelayFrames() { return itsStopTrackDelayFrames.getVal(); }
00173 
00174 
00175  void gotoSleep();
00176  void wakeUp();
00177  bool isSleeping() { return itsSleeping.getVal();}
00178 
00179  void setUseHead(bool val) { itsUseHead.setVal(val); }
00180  void setRelaxNeck(bool val) { itsRelaxNeck.setVal(val); }
00181  void setBoringnessThresh(int val) { itsBoringnessThresh.setVal(val); }
00182  void setErrTolerance(int val) { itsErrTolerance.setVal(val); }
00183  void setDistTolerance(int val) { itsDistTolerance.setVal(val); }
00184 
00185  void setTrackDelayFrames(long val)     {  itsTrackDelayFrames.setVal(val); }
00186  void setBigErrFramesThresh(long val)   {  itsBigErrFramesThresh.setVal(val); }
00187  void setTargetFramesThresh(long val)   {  itsTargetFramesThresh.setVal(val); }
00188  void setNoMoveFramesThresh(long val)   {  itsNoMoveFramesThresh.setVal(val); }
00189  void setStopTrackDelayFrames(long val) {  itsStopTrackDelayFrames.setVal(val); }
00190  void setKeepTracking(bool val) { itsKeepTracking.setVal(val); }
00191 
00192  void relaxHead(){itsBeoHead->relaxHead();}//stop all servo
00193 protected:
00194   virtual void start2();
00195 
00196   void paramChanged(ModelParamBase* const param,
00197                     const bool valueChanged,
00198                     ParamClient::ChangeStatus* status);
00199 
00200 private:
00201   void enterCheckTargetState();
00202 
00203   OModelParam<bool> itsAllowTracking;
00204   OModelParam<bool> itsKeepTracking; //Keep the tracking the target at much as posible
00205   NModelParam<bool> itsUseHead;
00206   NModelParam<bool> itsRelaxNeck;
00207   NModelParam<bool> itsSleeping;
00208   OModelParam<int> itsBoringnessThresh;
00209   NModelParam<unsigned int> itsErrTolerance;
00210   NModelParam<unsigned int> itsDistTolerance;
00211 
00212   OModelParam<unsigned long> itsTrackDelayFrames;
00213   NModelParam<unsigned long> itsBigErrFramesThresh;
00214   NModelParam<unsigned long> itsTargetFramesThresh;
00215   NModelParam<unsigned long> itsNoMoveFramesThresh;
00216   OModelParam<int> itsStopTrackDelayFrames;
00217 
00218   NModelParam<float> itsHeadInfoEyeTiltPos;
00219   NModelParam<float> itsHeadInfoEyePanPos;
00220   NModelParam<float> itsHeadInfoHeadPanPos;
00221 
00222   OModelParam<bool> itsSpeakSaliency;
00223   OModelParam<bool> itsSpeakObjects;
00224   OModelParam<std::string> itsSpeechFile;
00225 
00226   NModelParam<bool> itsRefreshSpeechFile;
00227 
00228   NModelParam<float> itsExcitementThresh;
00229 
00230   nub::soft_ref<BeoHead> itsBeoHead;
00231   nub::soft_ref<SpeechSynth> itsSpeechSynth;
00232 
00233   unsigned long itsTargetFrames;
00234   unsigned long itsBigErrFrames;
00235   unsigned long itsNoMoveFrames;
00236   unsigned long itsStopFrames;
00237   unsigned long itsHeadInfoFrames;
00238 
00239   int win_size;
00240 
00241 #ifdef HAVE_OPENCV
00242   int MAX_COUNT;
00243   int count;
00244   CvPoint2D32f* points[2], *swap_points;
00245   IplImage *pyramid;
00246   IplImage *prev_pyramid;
00247   Image<byte> prev_grey;
00248   char* status;
00249 #endif
00250   int flags;
00251   State itsState;
00252   Dims itsImageDims;
00253   bool itsTracking;
00254 
00255   float itsPrevTargetX;
00256   float itsPrevTargetY;
00257 
00258   float itsBoringness;
00259   int itsBoringCount;
00260   float itsExcitementLevel;
00261   float itsSleep;
00262   int itsPrepSleep;
00263 
00264   bool itsAlmostSinging;
00265 
00266   std::string itsLastSpokenLabel;
00267 
00268   struct SpeechToken
00269   {
00270     SpeechToken() : low(0), high(100), nspoken(0) {}
00271 
00272     std::vector<std::string> textList;
00273     int low;
00274     int high;
00275     mutable int nspoken;
00276   };
00277 
00278   struct TokenType
00279   {
00280     TokenType()
00281       :
00282       lastReturnedToken(0)
00283     {
00284       for (int i = 0; i < 256; ++i) this->histo[i] = 0;
00285     }
00286 
00287     std::string getTextItemForVal(int val) const
00288     {
00289       if (val < 0)
00290         LERROR("val too low (%d)", val);
00291       else if (val > 255)
00292         LERROR("val too high (%d)", val);
00293 
00294       this->histo[std::min(255, std::max(0,val))]++;
00295 
00296       int percentile=0, total=0;
00297 
00298       for (int i = 0; i < 256; ++i)
00299         {
00300           if (i <= val)
00301             percentile += this->histo[i];
00302           total += this->histo[i];
00303         }
00304 
00305       const int fpercentile = int(100.0 * double(percentile) / double(total));
00306 
00307       for (size_t i = 0; i < this->tokens.size(); ++i)
00308         {
00309           if (fpercentile >= this->tokens.at(i).low
00310               && fpercentile <= this->tokens.at(i).high)
00311             {
00312               int n = randomUpToNotIncluding
00313                 (this->tokens.at(i).textList.size());
00314 
00315               if (n == lastReturnedToken)
00316                 n = (n+1) % this->tokens.at(i).textList.size();
00317 
00318               if (n < 0)
00319                 { LERROR("n too low (%d)", n); n = 0; }
00320               else if (n >= int(this->tokens.at(i).textList.size()))
00321                 { LERROR("n too high (%d)", n); n = int(this->tokens.at(i).textList.size()); }
00322 
00323               lastReturnedToken = n;
00324 
00325               LDEBUG("fpercentile = %d; returning %s",
00326                      fpercentile, this->tokens.at(i).textList.at(n).c_str());
00327 
00328               this->tokens.at(i).nspoken++;
00329 
00330               return this->tokens.at(i).textList.at(n);
00331             }
00332         }
00333 
00334       return std::string();
00335     }
00336 
00337     Image<PixRGB<byte> > makeTokenHistogram(const Dims& dims,
00338                                             const PixRGB<byte>& back,
00339                                             const PixRGB<byte>& fore)
00340     {
00341       Image<PixRGB<byte> > result(dims, NO_INIT);
00342       result.clear(back);
00343 
00344       int smallhisto[16] = { 0 };
00345       for (int i = 0; i < 256; ++i)
00346         smallhisto[(i * 16) / 256] += this->histo[i];
00347 
00348       int maxval = 1;
00349       for (size_t i = 0; i < 16; ++i)
00350           if (smallhisto[i] > maxval) maxval = smallhisto[i];
00351 
00352       const int top = std::max(1, dims.h() - 10);
00353       for (size_t i = 0; i < 16; ++i)
00354         {
00355           const int left = (i * dims.w()) / 16;
00356           const int right =
00357             std::min(dims.w()-1, int(((i + 1) * dims.w()) / 16) - 1);
00358 
00359           for (int x = left; x <= right; ++x)
00360             {
00361               drawLine(result,
00362                        Point2D<int>(x, dims.h() - 1),
00363                        Point2D<int>(x, dims.h() - (smallhisto[i] * top) / maxval),
00364                        fore, 1);
00365             }
00366         }
00367 
00368       return result;
00369     }
00370 
00371     std::vector<SpeechToken> tokens;
00372     mutable int histo[256];
00373     mutable int lastReturnedToken;
00374   };
00375 
00376   typedef std::map<const std::string, TokenType> TokenMap;
00377   TokenMap itsSpeechTokenMap;
00378   mutable pthread_mutex_t itsSpeechTokenMapMutex;
00379 
00380   Stats itsStats;
00381 
00382   /// returns false if there is no such file; parsing errors will generate exceptions
00383   static bool readSpeechFile(TokenMap& tokenMap,
00384                              const std::string& fname);
00385   static void saveSpeechFile(const TokenMap& tokenMap,
00386                              const std::string& fname);
00387 };
00388 
00389 // ######################################################################
00390 /* So things look consistent in everyone's emacs... */
00391 /* Local Variables: */
00392 /* mode: c++ */
00393 /* indent-tabs-mode: nil */
00394 /* End: */
00395 
00396 #endif // NEURO_NEOBRAIN_H_DEFINED