00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #ifndef NEURO_NEOBRAIN_H_DEFINED
00039 #define NEURO_NEOBRAIN_H_DEFINED
00040
00041 #include "Component/ModelComponent.H"
00042 #include "Component/ModelParam.H"
00043 #include "Devices/BeoHead.H"
00044 #include "Devices/SpeechSynth.H"
00045 #include "Image/ColorOps.H"
00046 #include "Image/DrawOps.H"
00047 #include "Image/CutPaste.H"
00048 #include "Image/Image.H"
00049 #include "Image/Pixels.H"
00050 #include "Image/OpenCVUtil.H"
00051 #include "Component/ParamMap.H"
00052 #include "Util/sformat.H"
00053 #include "rutz/mutex.h"
00054 #include "Util/WorkThreadServer.H"
00055 #include "Util/JobWithSemaphore.H"
00056
00057 #include <pthread.h>
00058 #include <string>
00059 #include <vector>
00060 #include <map>
00061
00062
00063 #define VSS_DEMO 1
00064
00065 class NeoBrain : public ModelComponent
00066 {
00067 public:
00068 enum State {CHECK_TARGET, TRACK_TARGET};
00069
00070
00071 NeoBrain(OptionManager& mgr,
00072 const std::string& descrName = "NeoBrain",
00073 const std::string& tagName = "NeoBrain");
00074
00075
00076 virtual ~NeoBrain();
00077
00078 void init(Dims imageDims, int nPoints = 1, int wz = 10);
00079
00080
00081 void setTarget(Point2D<int> loc, const Image<byte>& grey,
00082 int saliencyval=-1, bool changeState = true);
00083 void setTarget(Point2D<int> loc, const Image<PixRGB<byte> >& rgb,
00084 int saliencyval=-1, bool changeState = true)
00085 {
00086 if (itsAllowTracking.getVal())
00087 this->setTarget(loc, luminance(rgb), saliencyval, changeState);
00088 }
00089 Point2D<int> trackObject(const Image<byte>& grey);
00090 std::vector<Point2D<int> > getTrackersLoc(const Image<byte>& grey);
00091 Point2D<int> trackObject(const Image<PixRGB<byte> >& rgb)
00092 {
00093 if (itsAllowTracking.getVal())
00094 return this->trackObject(luminance(rgb));
00095 else return Point2D<int>(-1,-1);
00096 }
00097
00098
00099
00100 void saySaliencyVal(byte val);
00101
00102 void moveHeadToTarget();
00103
00104 void updateBoringness(const Image<byte>& salmap, byte foaval);
00105
00106 void updateExcitement(double vcxflicker);
00107
00108 float getBoringness() const;
00109
00110 float getExcitementLevel() const;
00111 float getSleepLevel() const;
00112
00113 bool sayText(const std::string& text, int priority, bool block = false) const;
00114
00115 std::string getToken(const std::string& token, int val) const;
00116
00117 bool sayToken(const std::string& token, int val, int priority) const;
00118
00119 bool sayObjectLabel(const std::string& label, int confidence, bool foceLabel=false);
00120
00121 struct Stats
00122 {
00123 unsigned long bigerrframes, bigerrframes_thresh;
00124 unsigned long targetframes, targetframes_thresh;
00125 unsigned long nomoveframes, nomoveframes_thresh;
00126 unsigned long stopframes, stopframes_thresh;
00127 float last_err, last_xerr, last_yerr, err_tol, dist_tol;
00128 float leftEyePanPos, leftEyeTiltPos;
00129 float rightEyePanPos, rightEyeTiltPos;
00130 float headPanPos, headTiltPos, headYawPos;
00131 };
00132
00133 Stats getStats() const { return itsStats; }
00134
00135 Image<PixRGB<byte> > getSaliencyHisto(const Dims& dims,
00136 const PixRGB<byte>& back,
00137 const PixRGB<byte>& fore)
00138 {
00139 if (!itsSpeakSaliency.getVal())
00140 {
00141 Image<PixRGB<byte> > result(dims, NO_INIT);
00142 result.clear(back);
00143 return result;
00144 }
00145
00146 TokenType ttype;
00147 {
00148 GVX_MUTEX_LOCK(&itsSpeechTokenMapMutex);
00149 TokenMap::const_iterator itr = itsSpeechTokenMap.find("new_target");
00150 if (itr == itsSpeechTokenMap.end())
00151 LERROR("couldn't find new_target token type");
00152 else
00153 ttype = (*itr).second;
00154 }
00155 return ttype.makeTokenHistogram(dims, back, fore);
00156 }
00157
00158
00159
00160 bool getUseHead() {return itsUseHead.getVal(); }
00161 bool getRelaxNeck() {return itsRelaxNeck.getVal(); }
00162 int getBoringnessThresh() {return itsBoringnessThresh.getVal(); }
00163 int getErrTolerance() {return itsErrTolerance.getVal(); }
00164 int getDistTolerance() {return itsDistTolerance.getVal(); }
00165
00166 long getTrackDelayFrames() { return itsTrackDelayFrames.getVal(); }
00167 long getBigErrFramesThresh() { return itsBigErrFramesThresh.getVal(); }
00168 long getTargetFramesThresh() { return itsTargetFramesThresh.getVal(); }
00169 long getNoMoveFramesThresh() { return itsNoMoveFramesThresh.getVal(); }
00170 long getStopTrackDelayFrames() { return itsStopTrackDelayFrames.getVal(); }
00171
00172
00173 void gotoSleep();
00174 void wakeUp();
00175 bool isSleeping() { return itsSleeping.getVal();}
00176
00177 void setUseHead(bool val) { itsUseHead.setVal(val); }
00178 void setRelaxNeck(bool val) { itsRelaxNeck.setVal(val); }
00179 void setBoringnessThresh(int val) { itsBoringnessThresh.setVal(val); }
00180 void setErrTolerance(int val) { itsErrTolerance.setVal(val); }
00181 void setDistTolerance(int val) { itsDistTolerance.setVal(val); }
00182
00183 void setTrackDelayFrames(long val) { itsTrackDelayFrames.setVal(val); }
00184 void setBigErrFramesThresh(long val) { itsBigErrFramesThresh.setVal(val); }
00185 void setTargetFramesThresh(long val) { itsTargetFramesThresh.setVal(val); }
00186 void setNoMoveFramesThresh(long val) { itsNoMoveFramesThresh.setVal(val); }
00187 void setStopTrackDelayFrames(long val) { itsStopTrackDelayFrames.setVal(val); }
00188 void setKeepTracking(bool val) { itsKeepTracking.setVal(val); }
00189
00190 void relaxHead(){itsBeoHead->relaxHead();}
00191 protected:
00192 virtual void start2();
00193
00194 void paramChanged(ModelParamBase* const param,
00195 const bool valueChanged,
00196 ParamClient::ChangeStatus* status);
00197
00198 private:
00199 void enterCheckTargetState();
00200
00201 OModelParam<bool> itsAllowTracking;
00202 OModelParam<bool> itsKeepTracking;
00203 NModelParam<bool> itsUseHead;
00204 NModelParam<bool> itsRelaxNeck;
00205 NModelParam<bool> itsSleeping;
00206 OModelParam<int> itsBoringnessThresh;
00207 NModelParam<unsigned int> itsErrTolerance;
00208 NModelParam<unsigned int> itsDistTolerance;
00209
00210 OModelParam<unsigned long> itsTrackDelayFrames;
00211 NModelParam<unsigned long> itsBigErrFramesThresh;
00212 NModelParam<unsigned long> itsTargetFramesThresh;
00213 NModelParam<unsigned long> itsNoMoveFramesThresh;
00214 OModelParam<int> itsStopTrackDelayFrames;
00215
00216 NModelParam<float> itsHeadInfoEyeTiltPos;
00217 NModelParam<float> itsHeadInfoEyePanPos;
00218 NModelParam<float> itsHeadInfoHeadPanPos;
00219
00220 OModelParam<bool> itsSpeakSaliency;
00221 OModelParam<bool> itsSpeakObjects;
00222 OModelParam<std::string> itsSpeechFile;
00223
00224 NModelParam<bool> itsRefreshSpeechFile;
00225
00226 NModelParam<float> itsExcitementThresh;
00227
00228 nub::soft_ref<BeoHead> itsBeoHead;
00229 nub::soft_ref<SpeechSynth> itsSpeechSynth;
00230
00231 unsigned long itsTargetFrames;
00232 unsigned long itsBigErrFrames;
00233 unsigned long itsNoMoveFrames;
00234 unsigned long itsStopFrames;
00235 unsigned long itsHeadInfoFrames;
00236
00237 int win_size;
00238
00239 #ifdef HAVE_OPENCV
00240 int MAX_COUNT;
00241 int count;
00242 CvPoint2D32f* points[2], *swap_points;
00243 IplImage *pyramid;
00244 IplImage *prev_pyramid;
00245 Image<byte> prev_grey;
00246 char* status;
00247 #endif
00248 int flags;
00249 State itsState;
00250 Dims itsImageDims;
00251 bool itsTracking;
00252
00253 float itsPrevTargetX;
00254 float itsPrevTargetY;
00255
00256 float itsBoringness;
00257 int itsBoringCount;
00258 float itsExcitementLevel;
00259 float itsSleep;
00260 int itsPrepSleep;
00261
00262 bool itsAlmostSinging;
00263
00264 std::string itsLastSpokenLabel;
00265
00266 struct SpeechToken
00267 {
00268 SpeechToken() : low(0), high(100), nspoken(0) {}
00269
00270 std::vector<std::string> textList;
00271 int low;
00272 int high;
00273 mutable int nspoken;
00274 };
00275
00276 struct TokenType
00277 {
00278 TokenType()
00279 :
00280 lastReturnedToken(0)
00281 {
00282 for (int i = 0; i < 256; ++i) this->histo[i] = 0;
00283 }
00284
00285 std::string getTextItemForVal(int val) const
00286 {
00287 if (val < 0)
00288 LERROR("val too low (%d)", val);
00289 else if (val > 255)
00290 LERROR("val too high (%d)", val);
00291
00292 this->histo[std::min(255, std::max(0,val))]++;
00293
00294 int percentile=0, total=0;
00295
00296 for (int i = 0; i < 256; ++i)
00297 {
00298 if (i <= val)
00299 percentile += this->histo[i];
00300 total += this->histo[i];
00301 }
00302
00303 const int fpercentile = int(100.0 * double(percentile) / double(total));
00304
00305 for (size_t i = 0; i < this->tokens.size(); ++i)
00306 {
00307 if (fpercentile >= this->tokens.at(i).low
00308 && fpercentile <= this->tokens.at(i).high)
00309 {
00310 int n = randomUpToNotIncluding
00311 (this->tokens.at(i).textList.size());
00312
00313 if (n == lastReturnedToken)
00314 n = (n+1) % this->tokens.at(i).textList.size();
00315
00316 if (n < 0)
00317 { LERROR("n too low (%d)", n); n = 0; }
00318 else if (n >= int(this->tokens.at(i).textList.size()))
00319 { LERROR("n too high (%d)", n); n = int(this->tokens.at(i).textList.size()); }
00320
00321 lastReturnedToken = n;
00322
00323 LDEBUG("fpercentile = %d; returning %s",
00324 fpercentile, this->tokens.at(i).textList.at(n).c_str());
00325
00326 this->tokens.at(i).nspoken++;
00327
00328 return this->tokens.at(i).textList.at(n);
00329 }
00330 }
00331
00332 return std::string();
00333 }
00334
00335 Image<PixRGB<byte> > makeTokenHistogram(const Dims& dims,
00336 const PixRGB<byte>& back,
00337 const PixRGB<byte>& fore)
00338 {
00339 Image<PixRGB<byte> > result(dims, NO_INIT);
00340 result.clear(back);
00341
00342 int smallhisto[16] = { 0 };
00343 for (int i = 0; i < 256; ++i)
00344 smallhisto[(i * 16) / 256] += this->histo[i];
00345
00346 int maxval = 1;
00347 for (size_t i = 0; i < 16; ++i)
00348 if (smallhisto[i] > maxval) maxval = smallhisto[i];
00349
00350 const int top = std::max(1, dims.h() - 10);
00351 for (size_t i = 0; i < 16; ++i)
00352 {
00353 const int left = (i * dims.w()) / 16;
00354 const int right =
00355 std::min(dims.w()-1, int(((i + 1) * dims.w()) / 16) - 1);
00356
00357 for (int x = left; x <= right; ++x)
00358 {
00359 drawLine(result,
00360 Point2D<int>(x, dims.h() - 1),
00361 Point2D<int>(x, dims.h() - (smallhisto[i] * top) / maxval),
00362 fore, 1);
00363 }
00364 }
00365
00366 return result;
00367 }
00368
00369 std::vector<SpeechToken> tokens;
00370 mutable int histo[256];
00371 mutable int lastReturnedToken;
00372 };
00373
00374 typedef std::map<const std::string, TokenType> TokenMap;
00375 TokenMap itsSpeechTokenMap;
00376 mutable pthread_mutex_t itsSpeechTokenMapMutex;
00377
00378 Stats itsStats;
00379
00380
00381 static bool readSpeechFile(TokenMap& tokenMap,
00382 const std::string& fname);
00383 static void saveSpeechFile(const TokenMap& tokenMap,
00384 const std::string& fname);
00385 };
00386
00387
00388
00389
00390
00391
00392
00393
00394 #endif // NEURO_NEOBRAIN_H_DEFINED