00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #ifndef NEURO_NEOBRAIN_H_DEFINED
00039 #define NEURO_NEOBRAIN_H_DEFINED
00040
00041 #include "Image/OpenCVUtil.H"
00042
00043 #include "Component/ModelComponent.H"
00044 #include "Component/ModelParam.H"
00045 #include "Devices/BeoHead.H"
00046 #include "Devices/SpeechSynth.H"
00047 #include "Image/ColorOps.H"
00048 #include "Image/DrawOps.H"
00049 #include "Image/CutPaste.H"
00050 #include "Image/Image.H"
00051 #include "Image/Pixels.H"
00052 #include "Image/OpenCVUtil.H"
00053 #include "Component/ParamMap.H"
00054 #include "Util/sformat.H"
00055 #include "rutz/mutex.h"
00056 #include "Util/WorkThreadServer.H"
00057 #include "Util/JobWithSemaphore.H"
00058
00059 #include <pthread.h>
00060 #include <string>
00061 #include <vector>
00062 #include <map>
00063
00064
00065 #define VSS_DEMO 1
00066
00067 class NeoBrain : public ModelComponent
00068 {
00069 public:
00070 enum State {CHECK_TARGET, TRACK_TARGET};
00071
00072
00073 NeoBrain(OptionManager& mgr,
00074 const std::string& descrName = "NeoBrain",
00075 const std::string& tagName = "NeoBrain");
00076
00077
00078 virtual ~NeoBrain();
00079
00080 void init(Dims imageDims, int nPoints = 1, int wz = 10);
00081
00082
00083 void setTarget(Point2D<int> loc, const Image<byte>& grey,
00084 int saliencyval=-1, bool changeState = false, bool forceNewLocation = false);
00085 void setTarget(Point2D<int> loc, const Image<PixRGB<byte> >& rgb,
00086 int saliencyval=-1, bool changeState = false, bool forceNewLocation = false)
00087 {
00088 if (itsAllowTracking.getVal())
00089 this->setTarget(loc, luminance(rgb), saliencyval, changeState, forceNewLocation);
00090 }
00091 Point2D<int> trackObject(const Image<byte>& grey);
00092 std::vector<Point2D<int> > getTrackersLoc(const Image<byte>& grey);
00093 Point2D<int> trackObject(const Image<PixRGB<byte> >& rgb)
00094 {
00095 if (itsAllowTracking.getVal())
00096 return this->trackObject(luminance(rgb));
00097 else return Point2D<int>(-1,-1);
00098 }
00099
00100
00101
00102 void saySaliencyVal(byte val);
00103
00104 void moveHeadToTarget();
00105
00106 void updateBoringness(const Image<byte>& salmap, byte foaval);
00107
00108 void updateExcitement(double vcxflicker);
00109
00110 float getBoringness() const;
00111
00112 float getExcitementLevel() const;
00113 float getSleepLevel() const;
00114
00115 bool sayText(const std::string& text, int priority, bool block = false) const;
00116
00117 std::string getToken(const std::string& token, int val) const;
00118
00119 bool sayToken(const std::string& token, int val, int priority) const;
00120
00121 bool sayObjectLabel(const std::string& label, int confidence, bool foceLabel=false);
00122
00123 struct Stats
00124 {
00125 unsigned long bigerrframes, bigerrframes_thresh;
00126 unsigned long targetframes, targetframes_thresh;
00127 unsigned long nomoveframes, nomoveframes_thresh;
00128 unsigned long stopframes, stopframes_thresh;
00129 float last_err, last_xerr, last_yerr, err_tol, dist_tol;
00130 float leftEyePanPos, leftEyeTiltPos;
00131 float rightEyePanPos, rightEyeTiltPos;
00132 float headPanPos, headTiltPos, headYawPos;
00133 };
00134
00135 Stats getStats() const { return itsStats; }
00136
00137 Image<PixRGB<byte> > getSaliencyHisto(const Dims& dims,
00138 const PixRGB<byte>& back,
00139 const PixRGB<byte>& fore)
00140 {
00141 if (!itsSpeakSaliency.getVal())
00142 {
00143 Image<PixRGB<byte> > result(dims, NO_INIT);
00144 result.clear(back);
00145 return result;
00146 }
00147
00148 TokenType ttype;
00149 {
00150 GVX_MUTEX_LOCK(&itsSpeechTokenMapMutex);
00151 TokenMap::const_iterator itr = itsSpeechTokenMap.find("new_target");
00152 if (itr == itsSpeechTokenMap.end())
00153 LERROR("couldn't find new_target token type");
00154 else
00155 ttype = (*itr).second;
00156 }
00157 return ttype.makeTokenHistogram(dims, back, fore);
00158 }
00159
00160
00161
00162 bool getUseHead() {return itsUseHead.getVal(); }
00163 bool getRelaxNeck() {return itsRelaxNeck.getVal(); }
00164 int getBoringnessThresh() {return itsBoringnessThresh.getVal(); }
00165 int getErrTolerance() {return itsErrTolerance.getVal(); }
00166 int getDistTolerance() {return itsDistTolerance.getVal(); }
00167
00168 long getTrackDelayFrames() { return itsTrackDelayFrames.getVal(); }
00169 long getBigErrFramesThresh() { return itsBigErrFramesThresh.getVal(); }
00170 long getTargetFramesThresh() { return itsTargetFramesThresh.getVal(); }
00171 long getNoMoveFramesThresh() { return itsNoMoveFramesThresh.getVal(); }
00172 long getStopTrackDelayFrames() { return itsStopTrackDelayFrames.getVal(); }
00173
00174
00175 void gotoSleep();
00176 void wakeUp();
00177 bool isSleeping() { return itsSleeping.getVal();}
00178
00179 void setUseHead(bool val) { itsUseHead.setVal(val); }
00180 void setRelaxNeck(bool val) { itsRelaxNeck.setVal(val); }
00181 void setBoringnessThresh(int val) { itsBoringnessThresh.setVal(val); }
00182 void setErrTolerance(int val) { itsErrTolerance.setVal(val); }
00183 void setDistTolerance(int val) { itsDistTolerance.setVal(val); }
00184
00185 void setTrackDelayFrames(long val) { itsTrackDelayFrames.setVal(val); }
00186 void setBigErrFramesThresh(long val) { itsBigErrFramesThresh.setVal(val); }
00187 void setTargetFramesThresh(long val) { itsTargetFramesThresh.setVal(val); }
00188 void setNoMoveFramesThresh(long val) { itsNoMoveFramesThresh.setVal(val); }
00189 void setStopTrackDelayFrames(long val) { itsStopTrackDelayFrames.setVal(val); }
00190 void setKeepTracking(bool val) { itsKeepTracking.setVal(val); }
00191
00192 void relaxHead(){itsBeoHead->relaxHead();}
00193 protected:
00194 virtual void start2();
00195
00196 void paramChanged(ModelParamBase* const param,
00197 const bool valueChanged,
00198 ParamClient::ChangeStatus* status);
00199
00200 private:
00201 void enterCheckTargetState();
00202
00203 OModelParam<bool> itsAllowTracking;
00204 OModelParam<bool> itsKeepTracking;
00205 NModelParam<bool> itsUseHead;
00206 NModelParam<bool> itsRelaxNeck;
00207 NModelParam<bool> itsSleeping;
00208 OModelParam<int> itsBoringnessThresh;
00209 NModelParam<unsigned int> itsErrTolerance;
00210 NModelParam<unsigned int> itsDistTolerance;
00211
00212 OModelParam<unsigned long> itsTrackDelayFrames;
00213 NModelParam<unsigned long> itsBigErrFramesThresh;
00214 NModelParam<unsigned long> itsTargetFramesThresh;
00215 NModelParam<unsigned long> itsNoMoveFramesThresh;
00216 OModelParam<int> itsStopTrackDelayFrames;
00217
00218 NModelParam<float> itsHeadInfoEyeTiltPos;
00219 NModelParam<float> itsHeadInfoEyePanPos;
00220 NModelParam<float> itsHeadInfoHeadPanPos;
00221
00222 OModelParam<bool> itsSpeakSaliency;
00223 OModelParam<bool> itsSpeakObjects;
00224 OModelParam<std::string> itsSpeechFile;
00225
00226 NModelParam<bool> itsRefreshSpeechFile;
00227
00228 NModelParam<float> itsExcitementThresh;
00229
00230 nub::soft_ref<BeoHead> itsBeoHead;
00231 nub::soft_ref<SpeechSynth> itsSpeechSynth;
00232
00233 unsigned long itsTargetFrames;
00234 unsigned long itsBigErrFrames;
00235 unsigned long itsNoMoveFrames;
00236 unsigned long itsStopFrames;
00237 unsigned long itsHeadInfoFrames;
00238
00239 int win_size;
00240
00241 #ifdef HAVE_OPENCV
00242 int MAX_COUNT;
00243 int count;
00244 CvPoint2D32f* points[2], *swap_points;
00245 IplImage *pyramid;
00246 IplImage *prev_pyramid;
00247 Image<byte> prev_grey;
00248 char* status;
00249 #endif
00250 int flags;
00251 State itsState;
00252 Dims itsImageDims;
00253 bool itsTracking;
00254
00255 float itsPrevTargetX;
00256 float itsPrevTargetY;
00257
00258 float itsBoringness;
00259 int itsBoringCount;
00260 float itsExcitementLevel;
00261 float itsSleep;
00262 int itsPrepSleep;
00263
00264 bool itsAlmostSinging;
00265
00266 std::string itsLastSpokenLabel;
00267
00268 struct SpeechToken
00269 {
00270 SpeechToken() : low(0), high(100), nspoken(0) {}
00271
00272 std::vector<std::string> textList;
00273 int low;
00274 int high;
00275 mutable int nspoken;
00276 };
00277
00278 struct TokenType
00279 {
00280 TokenType()
00281 :
00282 lastReturnedToken(0)
00283 {
00284 for (int i = 0; i < 256; ++i) this->histo[i] = 0;
00285 }
00286
00287 std::string getTextItemForVal(int val) const
00288 {
00289 if (val < 0)
00290 LERROR("val too low (%d)", val);
00291 else if (val > 255)
00292 LERROR("val too high (%d)", val);
00293
00294 this->histo[std::min(255, std::max(0,val))]++;
00295
00296 int percentile=0, total=0;
00297
00298 for (int i = 0; i < 256; ++i)
00299 {
00300 if (i <= val)
00301 percentile += this->histo[i];
00302 total += this->histo[i];
00303 }
00304
00305 const int fpercentile = int(100.0 * double(percentile) / double(total));
00306
00307 for (size_t i = 0; i < this->tokens.size(); ++i)
00308 {
00309 if (fpercentile >= this->tokens.at(i).low
00310 && fpercentile <= this->tokens.at(i).high)
00311 {
00312 int n = randomUpToNotIncluding
00313 (this->tokens.at(i).textList.size());
00314
00315 if (n == lastReturnedToken)
00316 n = (n+1) % this->tokens.at(i).textList.size();
00317
00318 if (n < 0)
00319 { LERROR("n too low (%d)", n); n = 0; }
00320 else if (n >= int(this->tokens.at(i).textList.size()))
00321 { LERROR("n too high (%d)", n); n = int(this->tokens.at(i).textList.size()); }
00322
00323 lastReturnedToken = n;
00324
00325 LDEBUG("fpercentile = %d; returning %s",
00326 fpercentile, this->tokens.at(i).textList.at(n).c_str());
00327
00328 this->tokens.at(i).nspoken++;
00329
00330 return this->tokens.at(i).textList.at(n);
00331 }
00332 }
00333
00334 return std::string();
00335 }
00336
00337 Image<PixRGB<byte> > makeTokenHistogram(const Dims& dims,
00338 const PixRGB<byte>& back,
00339 const PixRGB<byte>& fore)
00340 {
00341 Image<PixRGB<byte> > result(dims, NO_INIT);
00342 result.clear(back);
00343
00344 int smallhisto[16] = { 0 };
00345 for (int i = 0; i < 256; ++i)
00346 smallhisto[(i * 16) / 256] += this->histo[i];
00347
00348 int maxval = 1;
00349 for (size_t i = 0; i < 16; ++i)
00350 if (smallhisto[i] > maxval) maxval = smallhisto[i];
00351
00352 const int top = std::max(1, dims.h() - 10);
00353 for (size_t i = 0; i < 16; ++i)
00354 {
00355 const int left = (i * dims.w()) / 16;
00356 const int right =
00357 std::min(dims.w()-1, int(((i + 1) * dims.w()) / 16) - 1);
00358
00359 for (int x = left; x <= right; ++x)
00360 {
00361 drawLine(result,
00362 Point2D<int>(x, dims.h() - 1),
00363 Point2D<int>(x, dims.h() - (smallhisto[i] * top) / maxval),
00364 fore, 1);
00365 }
00366 }
00367
00368 return result;
00369 }
00370
00371 std::vector<SpeechToken> tokens;
00372 mutable int histo[256];
00373 mutable int lastReturnedToken;
00374 };
00375
00376 typedef std::map<const std::string, TokenType> TokenMap;
00377 TokenMap itsSpeechTokenMap;
00378 mutable pthread_mutex_t itsSpeechTokenMapMutex;
00379
00380 Stats itsStats;
00381
00382
00383 static bool readSpeechFile(TokenMap& tokenMap,
00384 const std::string& fname);
00385 static void saveSpeechFile(const TokenMap& tokenMap,
00386 const std::string& fname);
00387 };
00388
00389
00390
00391
00392
00393
00394
00395
00396 #endif // NEURO_NEOBRAIN_H_DEFINED