00001 /*!@file Neuro/NeoBrainVss.H for the vss demo*/ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00005 // by the University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Lior Elazary <elazary@usc.edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Neuro/NeoBrainVss.H $ 00035 // $Id: NeoBrainVss.H 10794 2009-02-08 06:21:09Z itti $ 00036 // 00037 00038 #ifndef NEURO_NEOBRAIN_H_DEFINED 00039 #define NEURO_NEOBRAIN_H_DEFINED 00040 00041 #include "Component/ModelComponent.H" 00042 #include "Component/ModelParam.H" 00043 #include "Devices/BeoHead.H" 00044 #include "Devices/SpeechSynth.H" 00045 #include "Image/ColorOps.H" 00046 #include "Image/DrawOps.H" 00047 #include "Image/CutPaste.H" 00048 #include "Image/Image.H" 00049 #include "Image/Pixels.H" 00050 #include "Image/OpenCVUtil.H" 00051 #include "Component/ParamMap.H" 00052 #include "Util/sformat.H" 00053 #include "rutz/mutex.h" 00054 #include "Util/WorkThreadServer.H" 00055 #include "Util/JobWithSemaphore.H" 00056 00057 #include <pthread.h> 00058 #include <string> 00059 #include <vector> 00060 #include <map> 00061 00062 //Define VSS_DEMO so that neovision2.C would include some spacific functions 00063 #define VSS_DEMO 1 00064 00065 class NeoBrain : public ModelComponent 00066 { 00067 public: 00068 enum State {CHECK_TARGET, TRACK_TARGET}; 00069 00070 //! Constructor 00071 NeoBrain(OptionManager& mgr, 00072 const std::string& descrName = "NeoBrain", 00073 const std::string& tagName = "NeoBrain"); 00074 00075 //! virtual destructor for safe inheritance 00076 virtual ~NeoBrain(); 00077 00078 void init(Dims imageDims, int nPoints = 1, int wz = 10); 00079 00080 //tracking 00081 void setTarget(Point2D<int> loc, const Image<byte>& grey, 00082 int saliencyval=-1, bool changeState = true); 00083 void setTarget(Point2D<int> loc, const Image<PixRGB<byte> >& rgb, 00084 int saliencyval=-1, bool changeState = true) 00085 { 00086 if (itsAllowTracking.getVal()) 00087 this->setTarget(loc, luminance(rgb), saliencyval, changeState); 00088 } 00089 Point2D<int> trackObject(const Image<byte>& grey); 00090 std::vector<Point2D<int> > getTrackersLoc(const Image<byte>& grey); 00091 Point2D<int> trackObject(const Image<PixRGB<byte> >& rgb) 00092 { 00093 if (itsAllowTracking.getVal()) 00094 return this->trackObject(luminance(rgb)); 00095 else return Point2D<int>(-1,-1); 00096 } 00097 00098 00099 00100 void saySaliencyVal(byte val); 00101 00102 void moveHeadToTarget(); 00103 00104 void updateBoringness(const Image<byte>& salmap, byte foaval); 00105 00106 void updateExcitement(double vcxflicker); 00107 00108 float getBoringness() const; 00109 00110 float getExcitementLevel() const; 00111 float getSleepLevel() const; 00112 00113 bool sayText(const std::string& text, int priority, bool block = false) const; 00114 00115 std::string getToken(const std::string& token, int val) const; 00116 00117 bool sayToken(const std::string& token, int val, int priority) const; 00118 00119 bool sayObjectLabel(const std::string& label, int confidence, bool foceLabel=false); 00120 00121 struct Stats 00122 { 00123 unsigned long bigerrframes, bigerrframes_thresh; 00124 unsigned long targetframes, targetframes_thresh; 00125 unsigned long nomoveframes, nomoveframes_thresh; 00126 unsigned long stopframes, stopframes_thresh; 00127 float last_err, last_xerr, last_yerr, err_tol, dist_tol; 00128 float leftEyePanPos, leftEyeTiltPos; 00129 float rightEyePanPos, rightEyeTiltPos; 00130 float headPanPos, headTiltPos, headYawPos; 00131 }; 00132 00133 Stats getStats() const { return itsStats; } 00134 00135 Image<PixRGB<byte> > getSaliencyHisto(const Dims& dims, 00136 const PixRGB<byte>& back, 00137 const PixRGB<byte>& fore) 00138 { 00139 if (!itsSpeakSaliency.getVal()) 00140 { 00141 Image<PixRGB<byte> > result(dims, NO_INIT); 00142 result.clear(back); 00143 return result; 00144 } 00145 00146 TokenType ttype; 00147 { 00148 GVX_MUTEX_LOCK(&itsSpeechTokenMapMutex); 00149 TokenMap::const_iterator itr = itsSpeechTokenMap.find("new_target"); 00150 if (itr == itsSpeechTokenMap.end()) 00151 LERROR("couldn't find new_target token type"); 00152 else 00153 ttype = (*itr).second; 00154 } 00155 return ttype.makeTokenHistogram(dims, back, fore); 00156 } 00157 00158 //for setting model params 00159 00160 bool getUseHead() {return itsUseHead.getVal(); } 00161 bool getRelaxNeck() {return itsRelaxNeck.getVal(); } 00162 int getBoringnessThresh() {return itsBoringnessThresh.getVal(); } 00163 int getErrTolerance() {return itsErrTolerance.getVal(); } 00164 int getDistTolerance() {return itsDistTolerance.getVal(); } 00165 00166 long getTrackDelayFrames() { return itsTrackDelayFrames.getVal(); } 00167 long getBigErrFramesThresh() { return itsBigErrFramesThresh.getVal(); } 00168 long getTargetFramesThresh() { return itsTargetFramesThresh.getVal(); } 00169 long getNoMoveFramesThresh() { return itsNoMoveFramesThresh.getVal(); } 00170 long getStopTrackDelayFrames() { return itsStopTrackDelayFrames.getVal(); } 00171 00172 00173 void gotoSleep(); 00174 void wakeUp(); 00175 bool isSleeping() { return itsSleeping.getVal();} 00176 00177 void setUseHead(bool val) { itsUseHead.setVal(val); } 00178 void setRelaxNeck(bool val) { itsRelaxNeck.setVal(val); } 00179 void setBoringnessThresh(int val) { itsBoringnessThresh.setVal(val); } 00180 void setErrTolerance(int val) { itsErrTolerance.setVal(val); } 00181 void setDistTolerance(int val) { itsDistTolerance.setVal(val); } 00182 00183 void setTrackDelayFrames(long val) { itsTrackDelayFrames.setVal(val); } 00184 void setBigErrFramesThresh(long val) { itsBigErrFramesThresh.setVal(val); } 00185 void setTargetFramesThresh(long val) { itsTargetFramesThresh.setVal(val); } 00186 void setNoMoveFramesThresh(long val) { itsNoMoveFramesThresh.setVal(val); } 00187 void setStopTrackDelayFrames(long val) { itsStopTrackDelayFrames.setVal(val); } 00188 void setKeepTracking(bool val) { itsKeepTracking.setVal(val); } 00189 00190 void relaxHead(){itsBeoHead->relaxHead();}//stop all servo 00191 protected: 00192 virtual void start2(); 00193 00194 void paramChanged(ModelParamBase* const param, 00195 const bool valueChanged, 00196 ParamClient::ChangeStatus* status); 00197 00198 private: 00199 void enterCheckTargetState(); 00200 00201 OModelParam<bool> itsAllowTracking; 00202 OModelParam<bool> itsKeepTracking; //Keep the tracking the target at much as posible 00203 NModelParam<bool> itsUseHead; 00204 NModelParam<bool> itsRelaxNeck; 00205 NModelParam<bool> itsSleeping; 00206 OModelParam<int> itsBoringnessThresh; 00207 NModelParam<unsigned int> itsErrTolerance; 00208 NModelParam<unsigned int> itsDistTolerance; 00209 00210 OModelParam<unsigned long> itsTrackDelayFrames; 00211 NModelParam<unsigned long> itsBigErrFramesThresh; 00212 NModelParam<unsigned long> itsTargetFramesThresh; 00213 NModelParam<unsigned long> itsNoMoveFramesThresh; 00214 OModelParam<int> itsStopTrackDelayFrames; 00215 00216 NModelParam<float> itsHeadInfoEyeTiltPos; 00217 NModelParam<float> itsHeadInfoEyePanPos; 00218 NModelParam<float> itsHeadInfoHeadPanPos; 00219 00220 OModelParam<bool> itsSpeakSaliency; 00221 OModelParam<bool> itsSpeakObjects; 00222 OModelParam<std::string> itsSpeechFile; 00223 00224 NModelParam<bool> itsRefreshSpeechFile; 00225 00226 NModelParam<float> itsExcitementThresh; 00227 00228 nub::soft_ref<BeoHead> itsBeoHead; 00229 nub::soft_ref<SpeechSynth> itsSpeechSynth; 00230 00231 unsigned long itsTargetFrames; 00232 unsigned long itsBigErrFrames; 00233 unsigned long itsNoMoveFrames; 00234 unsigned long itsStopFrames; 00235 unsigned long itsHeadInfoFrames; 00236 00237 int win_size; 00238 00239 #ifdef HAVE_OPENCV 00240 int MAX_COUNT; 00241 int count; 00242 CvPoint2D32f* points[2], *swap_points; 00243 IplImage *pyramid; 00244 IplImage *prev_pyramid; 00245 Image<byte> prev_grey; 00246 char* status; 00247 #endif 00248 int flags; 00249 State itsState; 00250 Dims itsImageDims; 00251 bool itsTracking; 00252 00253 float itsPrevTargetX; 00254 float itsPrevTargetY; 00255 00256 float itsBoringness; 00257 int itsBoringCount; 00258 float itsExcitementLevel; 00259 float itsSleep; 00260 int itsPrepSleep; 00261 00262 bool itsAlmostSinging; 00263 00264 std::string itsLastSpokenLabel; 00265 00266 struct SpeechToken 00267 { 00268 SpeechToken() : low(0), high(100), nspoken(0) {} 00269 00270 std::vector<std::string> textList; 00271 int low; 00272 int high; 00273 mutable int nspoken; 00274 }; 00275 00276 struct TokenType 00277 { 00278 TokenType() 00279 : 00280 lastReturnedToken(0) 00281 { 00282 for (int i = 0; i < 256; ++i) this->histo[i] = 0; 00283 } 00284 00285 std::string getTextItemForVal(int val) const 00286 { 00287 if (val < 0) 00288 LERROR("val too low (%d)", val); 00289 else if (val > 255) 00290 LERROR("val too high (%d)", val); 00291 00292 this->histo[std::min(255, std::max(0,val))]++; 00293 00294 int percentile=0, total=0; 00295 00296 for (int i = 0; i < 256; ++i) 00297 { 00298 if (i <= val) 00299 percentile += this->histo[i]; 00300 total += this->histo[i]; 00301 } 00302 00303 const int fpercentile = int(100.0 * double(percentile) / double(total)); 00304 00305 for (size_t i = 0; i < this->tokens.size(); ++i) 00306 { 00307 if (fpercentile >= this->tokens.at(i).low 00308 && fpercentile <= this->tokens.at(i).high) 00309 { 00310 int n = randomUpToNotIncluding 00311 (this->tokens.at(i).textList.size()); 00312 00313 if (n == lastReturnedToken) 00314 n = (n+1) % this->tokens.at(i).textList.size(); 00315 00316 if (n < 0) 00317 { LERROR("n too low (%d)", n); n = 0; } 00318 else if (n >= int(this->tokens.at(i).textList.size())) 00319 { LERROR("n too high (%d)", n); n = int(this->tokens.at(i).textList.size()); } 00320 00321 lastReturnedToken = n; 00322 00323 LDEBUG("fpercentile = %d; returning %s", 00324 fpercentile, this->tokens.at(i).textList.at(n).c_str()); 00325 00326 this->tokens.at(i).nspoken++; 00327 00328 return this->tokens.at(i).textList.at(n); 00329 } 00330 } 00331 00332 return std::string(); 00333 } 00334 00335 Image<PixRGB<byte> > makeTokenHistogram(const Dims& dims, 00336 const PixRGB<byte>& back, 00337 const PixRGB<byte>& fore) 00338 { 00339 Image<PixRGB<byte> > result(dims, NO_INIT); 00340 result.clear(back); 00341 00342 int smallhisto[16] = { 0 }; 00343 for (int i = 0; i < 256; ++i) 00344 smallhisto[(i * 16) / 256] += this->histo[i]; 00345 00346 int maxval = 1; 00347 for (size_t i = 0; i < 16; ++i) 00348 if (smallhisto[i] > maxval) maxval = smallhisto[i]; 00349 00350 const int top = std::max(1, dims.h() - 10); 00351 for (size_t i = 0; i < 16; ++i) 00352 { 00353 const int left = (i * dims.w()) / 16; 00354 const int right = 00355 std::min(dims.w()-1, int(((i + 1) * dims.w()) / 16) - 1); 00356 00357 for (int x = left; x <= right; ++x) 00358 { 00359 drawLine(result, 00360 Point2D<int>(x, dims.h() - 1), 00361 Point2D<int>(x, dims.h() - (smallhisto[i] * top) / maxval), 00362 fore, 1); 00363 } 00364 } 00365 00366 return result; 00367 } 00368 00369 std::vector<SpeechToken> tokens; 00370 mutable int histo[256]; 00371 mutable int lastReturnedToken; 00372 }; 00373 00374 typedef std::map<const std::string, TokenType> TokenMap; 00375 TokenMap itsSpeechTokenMap; 00376 mutable pthread_mutex_t itsSpeechTokenMapMutex; 00377 00378 Stats itsStats; 00379 00380 /// returns false if there is no such file; parsing errors will generate exceptions 00381 static bool readSpeechFile(TokenMap& tokenMap, 00382 const std::string& fname); 00383 static void saveSpeechFile(const TokenMap& tokenMap, 00384 const std::string& fname); 00385 }; 00386 00387 // ###################################################################### 00388 /* So things look consistent in everyone's emacs... */ 00389 /* Local Variables: */ 00390 /* mode: c++ */ 00391 /* indent-tabs-mode: nil */ 00392 /* End: */ 00393 00394 #endif // NEURO_NEOBRAIN_H_DEFINED