NeoBrainVss.H

Go to the documentation of this file.
00001 /*!@file Neuro/NeoBrainVss.H for the vss demo*/
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005   //
00005 // by the University of Southern California (USC) and the iLab at USC.  //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: Lior Elazary <elazary@usc.edu>
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Neuro/NeoBrainVss.H $
00035 // $Id: NeoBrainVss.H 10794 2009-02-08 06:21:09Z itti $
00036 //
00037 
00038 #ifndef NEURO_NEOBRAIN_H_DEFINED
00039 #define NEURO_NEOBRAIN_H_DEFINED
00040 
00041 #include "Component/ModelComponent.H"
00042 #include "Component/ModelParam.H"
00043 #include "Devices/BeoHead.H"
00044 #include "Devices/SpeechSynth.H"
00045 #include "Image/ColorOps.H"
00046 #include "Image/DrawOps.H"
00047 #include "Image/CutPaste.H"
00048 #include "Image/Image.H"
00049 #include "Image/Pixels.H"
00050 #include "Image/OpenCVUtil.H"
00051 #include "Component/ParamMap.H"
00052 #include "Util/sformat.H"
00053 #include "rutz/mutex.h"
00054 #include "Util/WorkThreadServer.H"
00055 #include "Util/JobWithSemaphore.H"
00056 
00057 #include <pthread.h>
00058 #include <string>
00059 #include <vector>
00060 #include <map>
00061 
00062 //Define VSS_DEMO so that neovision2.C would include some spacific functions
00063 #define VSS_DEMO 1
00064 
00065 class NeoBrain : public ModelComponent
00066 {
00067 public:
00068   enum State {CHECK_TARGET, TRACK_TARGET};
00069 
00070   //! Constructor
00071   NeoBrain(OptionManager& mgr,
00072            const std::string& descrName = "NeoBrain",
00073            const std::string& tagName = "NeoBrain");
00074 
00075   //! virtual destructor for safe inheritance
00076   virtual ~NeoBrain();
00077 
00078   void init(Dims imageDims, int nPoints = 1, int wz = 10);
00079 
00080   //tracking
00081   void setTarget(Point2D<int> loc, const Image<byte>& grey,
00082       int saliencyval=-1, bool changeState = true);
00083   void setTarget(Point2D<int> loc, const Image<PixRGB<byte> >& rgb,
00084       int saliencyval=-1, bool changeState = true)
00085   {
00086     if (itsAllowTracking.getVal())
00087       this->setTarget(loc, luminance(rgb), saliencyval, changeState);
00088   }
00089   Point2D<int> trackObject(const Image<byte>& grey);
00090   std::vector<Point2D<int> > getTrackersLoc(const Image<byte>& grey);
00091   Point2D<int> trackObject(const Image<PixRGB<byte> >& rgb)
00092   {
00093     if (itsAllowTracking.getVal())
00094       return this->trackObject(luminance(rgb));
00095     else return Point2D<int>(-1,-1);
00096   }
00097 
00098 
00099 
00100   void saySaliencyVal(byte val);
00101 
00102   void moveHeadToTarget();
00103 
00104   void updateBoringness(const Image<byte>& salmap, byte foaval);
00105 
00106   void updateExcitement(double vcxflicker);
00107 
00108   float getBoringness() const;
00109 
00110   float getExcitementLevel() const;
00111   float getSleepLevel() const;
00112 
00113   bool sayText(const std::string& text, int priority, bool block = false) const;
00114 
00115   std::string getToken(const std::string& token, int val) const;
00116 
00117   bool sayToken(const std::string& token, int val, int priority) const;
00118 
00119   bool sayObjectLabel(const std::string& label, int confidence, bool foceLabel=false);
00120 
00121   struct Stats
00122   {
00123     unsigned long bigerrframes, bigerrframes_thresh;
00124     unsigned long targetframes, targetframes_thresh;
00125     unsigned long nomoveframes, nomoveframes_thresh;
00126     unsigned long stopframes, stopframes_thresh;
00127     float last_err, last_xerr, last_yerr, err_tol, dist_tol;
00128     float leftEyePanPos, leftEyeTiltPos;
00129     float rightEyePanPos, rightEyeTiltPos;
00130     float headPanPos, headTiltPos, headYawPos;
00131   };
00132 
00133   Stats getStats() const { return itsStats; }
00134 
00135   Image<PixRGB<byte> > getSaliencyHisto(const Dims& dims,
00136                                         const PixRGB<byte>& back,
00137                                         const PixRGB<byte>& fore)
00138   {
00139     if (!itsSpeakSaliency.getVal())
00140       {
00141         Image<PixRGB<byte> > result(dims, NO_INIT);
00142         result.clear(back);
00143         return result;
00144       }
00145 
00146     TokenType ttype;
00147     {
00148       GVX_MUTEX_LOCK(&itsSpeechTokenMapMutex);
00149       TokenMap::const_iterator itr = itsSpeechTokenMap.find("new_target");
00150       if (itr == itsSpeechTokenMap.end())
00151         LERROR("couldn't find new_target token type");
00152       else
00153         ttype = (*itr).second;
00154     }
00155     return ttype.makeTokenHistogram(dims, back, fore);
00156   }
00157 
00158   //for setting model params
00159 
00160  bool getUseHead() {return itsUseHead.getVal(); }
00161  bool getRelaxNeck() {return itsRelaxNeck.getVal(); }
00162  int  getBoringnessThresh() {return itsBoringnessThresh.getVal(); }
00163  int  getErrTolerance() {return itsErrTolerance.getVal(); }
00164  int  getDistTolerance() {return itsDistTolerance.getVal(); }
00165 
00166  long getTrackDelayFrames()     { return itsTrackDelayFrames.getVal(); }
00167  long getBigErrFramesThresh()   { return itsBigErrFramesThresh.getVal(); }
00168  long getTargetFramesThresh()   { return itsTargetFramesThresh.getVal(); }
00169  long getNoMoveFramesThresh()   { return itsNoMoveFramesThresh.getVal(); }
00170  long getStopTrackDelayFrames() { return itsStopTrackDelayFrames.getVal(); }
00171 
00172 
00173  void gotoSleep();
00174  void wakeUp();
00175  bool isSleeping() { return itsSleeping.getVal();}
00176 
00177  void setUseHead(bool val) { itsUseHead.setVal(val); }
00178  void setRelaxNeck(bool val) { itsRelaxNeck.setVal(val); }
00179  void setBoringnessThresh(int val) { itsBoringnessThresh.setVal(val); }
00180  void setErrTolerance(int val) { itsErrTolerance.setVal(val); }
00181  void setDistTolerance(int val) { itsDistTolerance.setVal(val); }
00182 
00183  void setTrackDelayFrames(long val)     {  itsTrackDelayFrames.setVal(val); }
00184  void setBigErrFramesThresh(long val)   {  itsBigErrFramesThresh.setVal(val); }
00185  void setTargetFramesThresh(long val)   {  itsTargetFramesThresh.setVal(val); }
00186  void setNoMoveFramesThresh(long val)   {  itsNoMoveFramesThresh.setVal(val); }
00187  void setStopTrackDelayFrames(long val) {  itsStopTrackDelayFrames.setVal(val); }
00188  void setKeepTracking(bool val) { itsKeepTracking.setVal(val); }
00189 
00190  void relaxHead(){itsBeoHead->relaxHead();}//stop all servo
00191 protected:
00192   virtual void start2();
00193 
00194   void paramChanged(ModelParamBase* const param,
00195                     const bool valueChanged,
00196                     ParamClient::ChangeStatus* status);
00197 
00198 private:
00199   void enterCheckTargetState();
00200 
00201   OModelParam<bool> itsAllowTracking;
00202   OModelParam<bool> itsKeepTracking; //Keep the tracking the target at much as posible
00203   NModelParam<bool> itsUseHead;
00204   NModelParam<bool> itsRelaxNeck;
00205   NModelParam<bool> itsSleeping;
00206   OModelParam<int> itsBoringnessThresh;
00207   NModelParam<unsigned int> itsErrTolerance;
00208   NModelParam<unsigned int> itsDistTolerance;
00209 
00210   OModelParam<unsigned long> itsTrackDelayFrames;
00211   NModelParam<unsigned long> itsBigErrFramesThresh;
00212   NModelParam<unsigned long> itsTargetFramesThresh;
00213   NModelParam<unsigned long> itsNoMoveFramesThresh;
00214   OModelParam<int> itsStopTrackDelayFrames;
00215 
00216   NModelParam<float> itsHeadInfoEyeTiltPos;
00217   NModelParam<float> itsHeadInfoEyePanPos;
00218   NModelParam<float> itsHeadInfoHeadPanPos;
00219 
00220   OModelParam<bool> itsSpeakSaliency;
00221   OModelParam<bool> itsSpeakObjects;
00222   OModelParam<std::string> itsSpeechFile;
00223 
00224   NModelParam<bool> itsRefreshSpeechFile;
00225 
00226   NModelParam<float> itsExcitementThresh;
00227 
00228   nub::soft_ref<BeoHead> itsBeoHead;
00229   nub::soft_ref<SpeechSynth> itsSpeechSynth;
00230 
00231   unsigned long itsTargetFrames;
00232   unsigned long itsBigErrFrames;
00233   unsigned long itsNoMoveFrames;
00234   unsigned long itsStopFrames;
00235   unsigned long itsHeadInfoFrames;
00236 
00237   int win_size;
00238 
00239 #ifdef HAVE_OPENCV
00240   int MAX_COUNT;
00241   int count;
00242   CvPoint2D32f* points[2], *swap_points;
00243   IplImage *pyramid;
00244   IplImage *prev_pyramid;
00245   Image<byte> prev_grey;
00246   char* status;
00247 #endif
00248   int flags;
00249   State itsState;
00250   Dims itsImageDims;
00251   bool itsTracking;
00252 
00253   float itsPrevTargetX;
00254   float itsPrevTargetY;
00255 
00256   float itsBoringness;
00257   int itsBoringCount;
00258   float itsExcitementLevel;
00259   float itsSleep;
00260   int itsPrepSleep;
00261 
00262   bool itsAlmostSinging;
00263 
00264   std::string itsLastSpokenLabel;
00265 
00266   struct SpeechToken
00267   {
00268     SpeechToken() : low(0), high(100), nspoken(0) {}
00269 
00270     std::vector<std::string> textList;
00271     int low;
00272     int high;
00273     mutable int nspoken;
00274   };
00275 
00276   struct TokenType
00277   {
00278     TokenType()
00279       :
00280       lastReturnedToken(0)
00281     {
00282       for (int i = 0; i < 256; ++i) this->histo[i] = 0;
00283     }
00284 
00285     std::string getTextItemForVal(int val) const
00286     {
00287       if (val < 0)
00288         LERROR("val too low (%d)", val);
00289       else if (val > 255)
00290         LERROR("val too high (%d)", val);
00291 
00292       this->histo[std::min(255, std::max(0,val))]++;
00293 
00294       int percentile=0, total=0;
00295 
00296       for (int i = 0; i < 256; ++i)
00297         {
00298           if (i <= val)
00299             percentile += this->histo[i];
00300           total += this->histo[i];
00301         }
00302 
00303       const int fpercentile = int(100.0 * double(percentile) / double(total));
00304 
00305       for (size_t i = 0; i < this->tokens.size(); ++i)
00306         {
00307           if (fpercentile >= this->tokens.at(i).low
00308               && fpercentile <= this->tokens.at(i).high)
00309             {
00310               int n = randomUpToNotIncluding
00311                 (this->tokens.at(i).textList.size());
00312 
00313               if (n == lastReturnedToken)
00314                 n = (n+1) % this->tokens.at(i).textList.size();
00315 
00316               if (n < 0)
00317                 { LERROR("n too low (%d)", n); n = 0; }
00318               else if (n >= int(this->tokens.at(i).textList.size()))
00319                 { LERROR("n too high (%d)", n); n = int(this->tokens.at(i).textList.size()); }
00320 
00321               lastReturnedToken = n;
00322 
00323               LDEBUG("fpercentile = %d; returning %s",
00324                      fpercentile, this->tokens.at(i).textList.at(n).c_str());
00325 
00326               this->tokens.at(i).nspoken++;
00327 
00328               return this->tokens.at(i).textList.at(n);
00329             }
00330         }
00331 
00332       return std::string();
00333     }
00334 
00335     Image<PixRGB<byte> > makeTokenHistogram(const Dims& dims,
00336                                             const PixRGB<byte>& back,
00337                                             const PixRGB<byte>& fore)
00338     {
00339       Image<PixRGB<byte> > result(dims, NO_INIT);
00340       result.clear(back);
00341 
00342       int smallhisto[16] = { 0 };
00343       for (int i = 0; i < 256; ++i)
00344         smallhisto[(i * 16) / 256] += this->histo[i];
00345 
00346       int maxval = 1;
00347       for (size_t i = 0; i < 16; ++i)
00348           if (smallhisto[i] > maxval) maxval = smallhisto[i];
00349 
00350       const int top = std::max(1, dims.h() - 10);
00351       for (size_t i = 0; i < 16; ++i)
00352         {
00353           const int left = (i * dims.w()) / 16;
00354           const int right =
00355             std::min(dims.w()-1, int(((i + 1) * dims.w()) / 16) - 1);
00356 
00357           for (int x = left; x <= right; ++x)
00358             {
00359               drawLine(result,
00360                        Point2D<int>(x, dims.h() - 1),
00361                        Point2D<int>(x, dims.h() - (smallhisto[i] * top) / maxval),
00362                        fore, 1);
00363             }
00364         }
00365 
00366       return result;
00367     }
00368 
00369     std::vector<SpeechToken> tokens;
00370     mutable int histo[256];
00371     mutable int lastReturnedToken;
00372   };
00373 
00374   typedef std::map<const std::string, TokenType> TokenMap;
00375   TokenMap itsSpeechTokenMap;
00376   mutable pthread_mutex_t itsSpeechTokenMapMutex;
00377 
00378   Stats itsStats;
00379 
00380   /// returns false if there is no such file; parsing errors will generate exceptions
00381   static bool readSpeechFile(TokenMap& tokenMap,
00382                              const std::string& fname);
00383   static void saveSpeechFile(const TokenMap& tokenMap,
00384                              const std::string& fname);
00385 };
00386 
00387 // ######################################################################
00388 /* So things look consistent in everyone's emacs... */
00389 /* Local Variables: */
00390 /* mode: c++ */
00391 /* indent-tabs-mode: nil */
00392 /* End: */
00393 
00394 #endif // NEURO_NEOBRAIN_H_DEFINED