00001 /*!@file Neuro/NeoBrainVss.H for the vss demo*/ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00005 // by the University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Lior Elazary <elazary@usc.edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Neuro/NeoBrain.H $ 00035 // $Id: NeoBrain.H 13716 2010-07-28 22:07:03Z itti $ 00036 // 00037 00038 #ifndef NEURO_NEOBRAIN_H_DEFINED 00039 #define NEURO_NEOBRAIN_H_DEFINED 00040 00041 #include "Image/OpenCVUtil.H" // must be first to avoid conflicting defs of int64, uint64 00042 00043 #include "Component/ModelComponent.H" 00044 #include "Component/ModelParam.H" 00045 #include "Devices/BeoHead.H" 00046 #include "Devices/SpeechSynth.H" 00047 #include "Image/ColorOps.H" 00048 #include "Image/DrawOps.H" 00049 #include "Image/CutPaste.H" 00050 #include "Image/Image.H" 00051 #include "Image/Pixels.H" 00052 #include "Image/OpenCVUtil.H" 00053 #include "Component/ParamMap.H" 00054 #include "Util/sformat.H" 00055 #include "rutz/mutex.h" 00056 #include "Util/WorkThreadServer.H" 00057 #include "Util/JobWithSemaphore.H" 00058 00059 #include <pthread.h> 00060 #include <string> 00061 #include <vector> 00062 #include <map> 00063 00064 //Define VSS_DEMO so that neovision2.C would include some spacific functions 00065 #define VSS_DEMO 1 00066 00067 class NeoBrain : public ModelComponent 00068 { 00069 public: 00070 enum State {CHECK_TARGET, TRACK_TARGET}; 00071 00072 //! Constructor 00073 NeoBrain(OptionManager& mgr, 00074 const std::string& descrName = "NeoBrain", 00075 const std::string& tagName = "NeoBrain"); 00076 00077 //! virtual destructor for safe inheritance 00078 virtual ~NeoBrain(); 00079 00080 void init(Dims imageDims, int nPoints = 1, int wz = 10); 00081 00082 //tracking 00083 void setTarget(Point2D<int> loc, const Image<byte>& grey, 00084 int saliencyval=-1, bool changeState = false, bool forceNewLocation = false); 00085 void setTarget(Point2D<int> loc, const Image<PixRGB<byte> >& rgb, 00086 int saliencyval=-1, bool changeState = false, bool forceNewLocation = false) 00087 { 00088 if (itsAllowTracking.getVal()) 00089 this->setTarget(loc, luminance(rgb), saliencyval, changeState, forceNewLocation); 00090 } 00091 Point2D<int> trackObject(const Image<byte>& grey); 00092 std::vector<Point2D<int> > getTrackersLoc(const Image<byte>& grey); 00093 Point2D<int> trackObject(const Image<PixRGB<byte> >& rgb) 00094 { 00095 if (itsAllowTracking.getVal()) 00096 return this->trackObject(luminance(rgb)); 00097 else return Point2D<int>(-1,-1); 00098 } 00099 00100 00101 00102 void saySaliencyVal(byte val); 00103 00104 void moveHeadToTarget(); 00105 00106 void updateBoringness(const Image<byte>& salmap, byte foaval); 00107 00108 void updateExcitement(double vcxflicker); 00109 00110 float getBoringness() const; 00111 00112 float getExcitementLevel() const; 00113 float getSleepLevel() const; 00114 00115 bool sayText(const std::string& text, int priority, bool block = false) const; 00116 00117 std::string getToken(const std::string& token, int val) const; 00118 00119 bool sayToken(const std::string& token, int val, int priority) const; 00120 00121 bool sayObjectLabel(const std::string& label, int confidence, bool foceLabel=false); 00122 00123 struct Stats 00124 { 00125 unsigned long bigerrframes, bigerrframes_thresh; 00126 unsigned long targetframes, targetframes_thresh; 00127 unsigned long nomoveframes, nomoveframes_thresh; 00128 unsigned long stopframes, stopframes_thresh; 00129 float last_err, last_xerr, last_yerr, err_tol, dist_tol; 00130 float leftEyePanPos, leftEyeTiltPos; 00131 float rightEyePanPos, rightEyeTiltPos; 00132 float headPanPos, headTiltPos, headYawPos; 00133 }; 00134 00135 Stats getStats() const { return itsStats; } 00136 00137 Image<PixRGB<byte> > getSaliencyHisto(const Dims& dims, 00138 const PixRGB<byte>& back, 00139 const PixRGB<byte>& fore) 00140 { 00141 if (!itsSpeakSaliency.getVal()) 00142 { 00143 Image<PixRGB<byte> > result(dims, NO_INIT); 00144 result.clear(back); 00145 return result; 00146 } 00147 00148 TokenType ttype; 00149 { 00150 GVX_MUTEX_LOCK(&itsSpeechTokenMapMutex); 00151 TokenMap::const_iterator itr = itsSpeechTokenMap.find("new_target"); 00152 if (itr == itsSpeechTokenMap.end()) 00153 LERROR("couldn't find new_target token type"); 00154 else 00155 ttype = (*itr).second; 00156 } 00157 return ttype.makeTokenHistogram(dims, back, fore); 00158 } 00159 00160 //for setting model params 00161 00162 bool getUseHead() {return itsUseHead.getVal(); } 00163 bool getRelaxNeck() {return itsRelaxNeck.getVal(); } 00164 int getBoringnessThresh() {return itsBoringnessThresh.getVal(); } 00165 int getErrTolerance() {return itsErrTolerance.getVal(); } 00166 int getDistTolerance() {return itsDistTolerance.getVal(); } 00167 00168 long getTrackDelayFrames() { return itsTrackDelayFrames.getVal(); } 00169 long getBigErrFramesThresh() { return itsBigErrFramesThresh.getVal(); } 00170 long getTargetFramesThresh() { return itsTargetFramesThresh.getVal(); } 00171 long getNoMoveFramesThresh() { return itsNoMoveFramesThresh.getVal(); } 00172 long getStopTrackDelayFrames() { return itsStopTrackDelayFrames.getVal(); } 00173 00174 00175 void gotoSleep(); 00176 void wakeUp(); 00177 bool isSleeping() { return itsSleeping.getVal();} 00178 00179 void setUseHead(bool val) { itsUseHead.setVal(val); } 00180 void setRelaxNeck(bool val) { itsRelaxNeck.setVal(val); } 00181 void setBoringnessThresh(int val) { itsBoringnessThresh.setVal(val); } 00182 void setErrTolerance(int val) { itsErrTolerance.setVal(val); } 00183 void setDistTolerance(int val) { itsDistTolerance.setVal(val); } 00184 00185 void setTrackDelayFrames(long val) { itsTrackDelayFrames.setVal(val); } 00186 void setBigErrFramesThresh(long val) { itsBigErrFramesThresh.setVal(val); } 00187 void setTargetFramesThresh(long val) { itsTargetFramesThresh.setVal(val); } 00188 void setNoMoveFramesThresh(long val) { itsNoMoveFramesThresh.setVal(val); } 00189 void setStopTrackDelayFrames(long val) { itsStopTrackDelayFrames.setVal(val); } 00190 void setKeepTracking(bool val) { itsKeepTracking.setVal(val); } 00191 00192 void relaxHead(){itsBeoHead->relaxHead();}//stop all servo 00193 protected: 00194 virtual void start2(); 00195 00196 void paramChanged(ModelParamBase* const param, 00197 const bool valueChanged, 00198 ParamClient::ChangeStatus* status); 00199 00200 private: 00201 void enterCheckTargetState(); 00202 00203 OModelParam<bool> itsAllowTracking; 00204 OModelParam<bool> itsKeepTracking; //Keep the tracking the target at much as posible 00205 NModelParam<bool> itsUseHead; 00206 NModelParam<bool> itsRelaxNeck; 00207 NModelParam<bool> itsSleeping; 00208 OModelParam<int> itsBoringnessThresh; 00209 NModelParam<unsigned int> itsErrTolerance; 00210 NModelParam<unsigned int> itsDistTolerance; 00211 00212 OModelParam<unsigned long> itsTrackDelayFrames; 00213 NModelParam<unsigned long> itsBigErrFramesThresh; 00214 NModelParam<unsigned long> itsTargetFramesThresh; 00215 NModelParam<unsigned long> itsNoMoveFramesThresh; 00216 OModelParam<int> itsStopTrackDelayFrames; 00217 00218 NModelParam<float> itsHeadInfoEyeTiltPos; 00219 NModelParam<float> itsHeadInfoEyePanPos; 00220 NModelParam<float> itsHeadInfoHeadPanPos; 00221 00222 OModelParam<bool> itsSpeakSaliency; 00223 OModelParam<bool> itsSpeakObjects; 00224 OModelParam<std::string> itsSpeechFile; 00225 00226 NModelParam<bool> itsRefreshSpeechFile; 00227 00228 NModelParam<float> itsExcitementThresh; 00229 00230 nub::soft_ref<BeoHead> itsBeoHead; 00231 nub::soft_ref<SpeechSynth> itsSpeechSynth; 00232 00233 unsigned long itsTargetFrames; 00234 unsigned long itsBigErrFrames; 00235 unsigned long itsNoMoveFrames; 00236 unsigned long itsStopFrames; 00237 unsigned long itsHeadInfoFrames; 00238 00239 int win_size; 00240 00241 #ifdef HAVE_OPENCV 00242 int MAX_COUNT; 00243 int count; 00244 CvPoint2D32f* points[2], *swap_points; 00245 IplImage *pyramid; 00246 IplImage *prev_pyramid; 00247 Image<byte> prev_grey; 00248 char* status; 00249 #endif 00250 int flags; 00251 State itsState; 00252 Dims itsImageDims; 00253 bool itsTracking; 00254 00255 float itsPrevTargetX; 00256 float itsPrevTargetY; 00257 00258 float itsBoringness; 00259 int itsBoringCount; 00260 float itsExcitementLevel; 00261 float itsSleep; 00262 int itsPrepSleep; 00263 00264 bool itsAlmostSinging; 00265 00266 std::string itsLastSpokenLabel; 00267 00268 struct SpeechToken 00269 { 00270 SpeechToken() : low(0), high(100), nspoken(0) {} 00271 00272 std::vector<std::string> textList; 00273 int low; 00274 int high; 00275 mutable int nspoken; 00276 }; 00277 00278 struct TokenType 00279 { 00280 TokenType() 00281 : 00282 lastReturnedToken(0) 00283 { 00284 for (int i = 0; i < 256; ++i) this->histo[i] = 0; 00285 } 00286 00287 std::string getTextItemForVal(int val) const 00288 { 00289 if (val < 0) 00290 LERROR("val too low (%d)", val); 00291 else if (val > 255) 00292 LERROR("val too high (%d)", val); 00293 00294 this->histo[std::min(255, std::max(0,val))]++; 00295 00296 int percentile=0, total=0; 00297 00298 for (int i = 0; i < 256; ++i) 00299 { 00300 if (i <= val) 00301 percentile += this->histo[i]; 00302 total += this->histo[i]; 00303 } 00304 00305 const int fpercentile = int(100.0 * double(percentile) / double(total)); 00306 00307 for (size_t i = 0; i < this->tokens.size(); ++i) 00308 { 00309 if (fpercentile >= this->tokens.at(i).low 00310 && fpercentile <= this->tokens.at(i).high) 00311 { 00312 int n = randomUpToNotIncluding 00313 (this->tokens.at(i).textList.size()); 00314 00315 if (n == lastReturnedToken) 00316 n = (n+1) % this->tokens.at(i).textList.size(); 00317 00318 if (n < 0) 00319 { LERROR("n too low (%d)", n); n = 0; } 00320 else if (n >= int(this->tokens.at(i).textList.size())) 00321 { LERROR("n too high (%d)", n); n = int(this->tokens.at(i).textList.size()); } 00322 00323 lastReturnedToken = n; 00324 00325 LDEBUG("fpercentile = %d; returning %s", 00326 fpercentile, this->tokens.at(i).textList.at(n).c_str()); 00327 00328 this->tokens.at(i).nspoken++; 00329 00330 return this->tokens.at(i).textList.at(n); 00331 } 00332 } 00333 00334 return std::string(); 00335 } 00336 00337 Image<PixRGB<byte> > makeTokenHistogram(const Dims& dims, 00338 const PixRGB<byte>& back, 00339 const PixRGB<byte>& fore) 00340 { 00341 Image<PixRGB<byte> > result(dims, NO_INIT); 00342 result.clear(back); 00343 00344 int smallhisto[16] = { 0 }; 00345 for (int i = 0; i < 256; ++i) 00346 smallhisto[(i * 16) / 256] += this->histo[i]; 00347 00348 int maxval = 1; 00349 for (size_t i = 0; i < 16; ++i) 00350 if (smallhisto[i] > maxval) maxval = smallhisto[i]; 00351 00352 const int top = std::max(1, dims.h() - 10); 00353 for (size_t i = 0; i < 16; ++i) 00354 { 00355 const int left = (i * dims.w()) / 16; 00356 const int right = 00357 std::min(dims.w()-1, int(((i + 1) * dims.w()) / 16) - 1); 00358 00359 for (int x = left; x <= right; ++x) 00360 { 00361 drawLine(result, 00362 Point2D<int>(x, dims.h() - 1), 00363 Point2D<int>(x, dims.h() - (smallhisto[i] * top) / maxval), 00364 fore, 1); 00365 } 00366 } 00367 00368 return result; 00369 } 00370 00371 std::vector<SpeechToken> tokens; 00372 mutable int histo[256]; 00373 mutable int lastReturnedToken; 00374 }; 00375 00376 typedef std::map<const std::string, TokenType> TokenMap; 00377 TokenMap itsSpeechTokenMap; 00378 mutable pthread_mutex_t itsSpeechTokenMapMutex; 00379 00380 Stats itsStats; 00381 00382 /// returns false if there is no such file; parsing errors will generate exceptions 00383 static bool readSpeechFile(TokenMap& tokenMap, 00384 const std::string& fname); 00385 static void saveSpeechFile(const TokenMap& tokenMap, 00386 const std::string& fname); 00387 }; 00388 00389 // ###################################################################### 00390 /* So things look consistent in everyone's emacs... */ 00391 /* Local Variables: */ 00392 /* mode: c++ */ 00393 /* indent-tabs-mode: nil */ 00394 /* End: */ 00395 00396 #endif // NEURO_NEOBRAIN_H_DEFINED