00001 /*!@file MBARI/VisualEvent.H classes useful for event tracking */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2003 // 00005 // by the University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Dirk Walther <walther@caltech.edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/MBARI/VisualEvent.H $ 00035 // $Id: VisualEvent.H 9412 2008-03-10 23:10:15Z farhan $ 00036 // 00037 00038 #ifndef VISUALEVENT_H_DEFINED 00039 #define VISUALEVENT_H_DEFINED 00040 00041 #include "Image/KalmanFilter.H" 00042 #include "Image/Point2D.H" 00043 #include "Image/Rectangle.H" 00044 #include "MBARI/BitObject.H" 00045 #include "Util/Assert.H" 00046 #include "Util/Types.H" 00047 00048 #include <list> 00049 #include <string> 00050 #include <vector> 00051 00052 template <class T> class Image; 00053 template <class T> class PixRGB; 00054 class Vector2D; 00055 class StraighLine2D; 00056 00057 // ###################################################################### 00058 //! public class that contains information for a visual token for tracking 00059 class Token 00060 { 00061 public: 00062 00063 //!default constructor 00064 Token(); 00065 00066 //!constructor that fills the members with initial values 00067 Token (float x, float y, uint frame, BitObject bo); 00068 00069 //!constructor with the location being the centroid of the BitObject 00070 Token (BitObject bo, uint frame); 00071 00072 //!read the Token from the input stream is 00073 Token (std::istream& is); 00074 00075 //!the BitObject that holds info about the shape of this Token 00076 BitObject bitObject; 00077 00078 //!the x and y coordinates of the token in the frame 00079 Vector2D location; 00080 00081 //!the expected location for this token 00082 Vector2D prediction; 00083 00084 //!the straight line on which this token is moving 00085 StraightLine2D line; 00086 00087 //!the angle between line and the connection to the FOE 00088 float angle; 00089 00090 //!the frame number at which the token occurred 00091 uint frame_nr; 00092 00093 //! write the entire Token to the output stream os 00094 void writeToStream(std::ostream& os) const; 00095 00096 //! read the Token from the input stream is 00097 void readFromStream(std::istream& is); 00098 00099 //! write the Token's position to the output streeam os 00100 void writePosition(std::ostream& os) const; 00101 00102 }; 00103 00104 // ###################################################################### 00105 //! class that contains a set of property vectors for events 00106 /*! each property vector contains the following fields:<br> 00107 <table><tr><td><b>index</b><td><b>content</b></th> 00108 <tr><td>0<td>event number 00109 <tr><td>1<td>interestingness index 00110 <tr><td>2<td>area 00111 <tr><td>3<td>Uxx 00112 <tr><td>4<td>Uyy 00113 <tr><td>5<td>Uxy 00114 <tr><td>6<td>major Axis 00115 <tr><td>7<td>minor Axis 00116 <tr><td>8<td>elongation 00117 <tr><td>9<td>orientation angle 00118 <tr><td>10<td>maximum intensity 00119 <tr><td>11<td>minimum intensity 00120 <tr><td>12<td>average intensity</table>*/ 00121 class PropertyVectorSet 00122 { 00123 public: 00124 00125 //! default contructor 00126 PropertyVectorSet(); 00127 00128 //! read from stream constructor 00129 PropertyVectorSet(std::istream& is); 00130 00131 //! write the entire PropertyVectorSet to the output stream os 00132 void writeToStream(std::ostream& os) const; 00133 00134 //! read the PropertyVectorSet from the input stream is 00135 void readFromStream(std::istream& is); 00136 00137 //! return the PropertyVector for an event by its number 00138 std::vector<float> getPropertyVectorForEvent(const int num); 00139 00140 //! the vector of property vectors 00141 std::vector< std::vector<float> > itsVectors; 00142 }; 00143 00144 00145 // ###################################################################### 00146 //!stores a visual event as a sequence of tokens with additional information 00147 class VisualEvent 00148 { 00149 public: 00150 00151 //! constructor 00152 /*!@param tk the first token for this event*/ 00153 VisualEvent(Token tk, int maxDist); 00154 00155 //! read the VisualEvent from the input stream is 00156 VisualEvent(std::istream& is); 00157 00158 //! write the entire VisualEvent to the output stream os 00159 void writeToStream(std::ostream& os) const; 00160 00161 //! read the VisualEvent from the input stream is 00162 void readFromStream(std::istream& is); 00163 00164 //! write all the positions for this event to the output stream os 00165 void writePositions(std::ostream& os) const; 00166 00167 //! get the prediction for the location of the next token 00168 Point2D<int> predictedLocation() const; 00169 00170 //! is this token allowed as the next one? 00171 bool isTokenOk(const Token& tk) const; 00172 00173 //! returns the cost of associating tk with this event 00174 /*!@return returns -1.0F if the token is not valid for this event*/ 00175 float getCost(const Token& tk) const; 00176 00177 //! assign tk to this event, use foe as the focus of expansion 00178 void assign(const Token& tk, const Vector2D& foe); 00179 00180 //! if the BitObject intersects with the one for this event at frameNum 00181 bool doesIntersect(const BitObject& obj, int frameNum) const; 00182 00183 //! mark this event as "closed" 00184 inline void close(); 00185 00186 //! whether this event is closed 00187 inline bool isClosed(); 00188 00189 //! return the event identification number of this event 00190 inline uint getEventNum() const; 00191 00192 //! return the frame number of the first token 00193 inline uint getStartFrame() const; 00194 00195 //! return the frame number of the last token 00196 inline uint getEndFrame() const; 00197 00198 //! return the number of frames that this event spans 00199 inline uint getNumberOfFrames() const; 00200 00201 //! return the maximum object size of any of the tokens 00202 inline int getMaxSize() const; 00203 00204 //! return the token that has the maximum object size 00205 inline Token getMaxSizeToken() const; 00206 00207 //!return a token based on a frame number 00208 inline Token getToken(const uint frame_num) const; 00209 00210 //!return whether frame_num is between start frame and end frame 00211 inline bool isFrameOk(const uint frame_num) const; 00212 00213 //! return a vector that contains a bunch of numbers that describe the event 00214 /*! see source code for details */ 00215 std::vector<float> getPropertyVector(); 00216 00217 //! returns the maximum dimensions of the tracked object in any of the frames 00218 Dims getMaxObjectDims() const; 00219 00220 private: 00221 static uint counter; 00222 uint myNum; 00223 std::vector<Token> tokens; 00224 uint startframe, endframe; 00225 int max_size; 00226 uint maxsize_framenr; 00227 bool closed; 00228 int itsMaxDist; 00229 KalmanFilter xTracker, yTracker; 00230 }; 00231 00232 // ###################################################################### 00233 // ######## VisualEventSet 00234 // ###################################################################### 00235 //! contains a set of visual events to which a set of tokens can be matched 00236 class VisualEventSet 00237 { 00238 public: 00239 00240 //! constructor 00241 /*!@param maxDist the maximum distance that a token can have from an 00242 event and still be considered for becoming part of the event 00243 @param minFrameNum the minimum number of frames for which an event needs 00244 be present 00245 @param minSize the minimum size (in pixels) for an "interesting" event 00246 @param fileName the fileName for saving intermediate result*/ 00247 VisualEventSet(const int maxDist, 00248 const uint minFrameNum, 00249 const int minSize, 00250 const std::string& filename); 00251 00252 //! read the VisualEventSet from the input stream is 00253 VisualEventSet(std::istream& is); 00254 00255 //! write the entire VisualEventSet to the output stream os 00256 void writeToStream(std::ostream& os) const; 00257 00258 //! read the VisualEventSet from the input stream is 00259 void readFromStream(std::istream& is); 00260 00261 //! write th positions of all events to the output stream os 00262 void writePositions(std::ostream& os) const; 00263 00264 //! extract BitObjects from binMap and assign them to the events 00265 /*!@param curFOE the current focus of expansion for detecting unusual 00266 motion*/ 00267 void updateEvents(const Image<byte>& binMap, const Vector2D& curFOE, 00268 int frameNum); 00269 00270 //! initiate new events for all BitObjects in bos if they aren't tracked yet 00271 void initiateEvents(std::list<BitObject>& bos, int frameNum); 00272 00273 //! if obj intersects with any of the event at frameNum 00274 bool doesIntersect(const BitObject& obj, int frameNum) const; 00275 00276 //! update the focus of expansion 00277 //Vector2D updateFOE(); 00278 00279 //! returns the current focus of expansion for frameNum 00280 Vector2D getFOE(int frameNum) const; 00281 00282 //! return the number of stored events 00283 uint numEvents() const; 00284 00285 //! delete all stored events 00286 void reset(); 00287 00288 //! clean up the event list - erase all unsuccessful candidates 00289 /*!@param currFrame - the current frame number in processing 00290 @param maxFrameSkip how many frames may be skipped using interpolation 00291 (default: 1)*/ 00292 void cleanUp(uint currFrame, uint maxFrameSkip = 1); 00293 00294 //! close all events (for clean-up at the end) 00295 void closeAll(); 00296 00297 //! returns a set of all tokens stored at a particular frame number 00298 std::vector<Token> getTokens(uint frameNum); 00299 00300 //! draw the tokens for a particular frame number into an image 00301 /*!@param img the image to draw on 00302 @param frameNum the frame number for which to draw the tokens 00303 @param circleRadius the radius of the circle used to mark the tokens 00304 @param colorInteresting the color for "interesting" tokens 00305 @param colorCandidate the color for unsuccessful candidate tokens 00306 @param colorPred the colot for event location predictions 00307 @param showEventLabels whether to write the event labels into the image*/ 00308 void drawTokens(Image< PixRGB<byte> >& img, 00309 uint frameNum, 00310 PropertyVectorSet& pvs, 00311 int circleRadius, 00312 BitObjectDrawMode mode, 00313 float opacity, 00314 PixRGB<byte> colorInteresting, 00315 PixRGB<byte> colorCandidate, 00316 PixRGB<byte> colorPred, 00317 PixRGB<byte> colorFOE, 00318 bool showEventLabels); 00319 00320 //! returns a PropertyVectorSet for this VisualEventSet 00321 PropertyVectorSet getPropertyVectorSet(); 00322 00323 //! return the latest frame number before currFrame for which events have been closed already 00324 int getAllClosedFrameNum(uint currFrame); 00325 00326 //! whether event is "interesting" 00327 bool isEventInteresting(std::vector<float> propVec) const; 00328 00329 //! test whether the event with eventNum exists in this set 00330 bool doesEventExist(uint eventNum) const; 00331 00332 //! Returns the event with number eventNum 00333 VisualEvent getEventByNumber(uint eventNum) const; 00334 00335 protected: 00336 //! Returns a vector with iterators pointing to all events at framenum 00337 std::vector<std::list<VisualEvent>::iterator> getEventsForFrame(uint framenum); 00338 00339 private: 00340 // compute the right position for a text label 00341 Point2D<int> getLabelPosition(Dims imgDims,Rectangle bbox, 00342 Dims textDims) const; 00343 std::list<VisualEvent> itsEvents; 00344 int itsMaxDist; 00345 float itsMaxCost; 00346 uint itsMinFrameNum; 00347 int itsMinSize; 00348 int startframe, endframe; 00349 std::string itsFileName; 00350 std::vector<Vector2D> itsFOE; 00351 //Vector2D sumFOE; 00352 //int numFOE; 00353 KalmanFilter xFOE, yFOE; 00354 }; 00355 00356 00357 00358 // ###################################################################### 00359 // ########### INLINED METHODS 00360 // ###################################################################### 00361 inline void VisualEvent::close() 00362 { closed = true; } 00363 00364 // ###################################################################### 00365 inline bool VisualEvent::isClosed() 00366 { return closed; } 00367 00368 // ###################################################################### 00369 inline uint VisualEvent::getEventNum() const 00370 { return myNum; } 00371 00372 // ###################################################################### 00373 inline uint VisualEvent::getStartFrame() const 00374 { return startframe; } 00375 00376 // ###################################################################### 00377 inline uint VisualEvent::getEndFrame() const 00378 { return endframe; } 00379 00380 // ###################################################################### 00381 inline uint VisualEvent::getNumberOfFrames() const 00382 { return (endframe - startframe + 1); } 00383 00384 // ###################################################################### 00385 inline int VisualEvent::getMaxSize() const 00386 { return max_size; } 00387 00388 // ###################################################################### 00389 inline Token VisualEvent::getMaxSizeToken() const 00390 { return getToken(maxsize_framenr); } 00391 00392 // ###################################################################### 00393 inline Token VisualEvent::getToken(uint frame_num) const 00394 { 00395 ASSERT (isFrameOk(frame_num)); 00396 return tokens[frame_num - startframe]; 00397 } 00398 00399 // ###################################################################### 00400 inline bool VisualEvent::isFrameOk(const uint frame_num) const 00401 { 00402 return ((frame_num >= startframe) && (frame_num <= endframe)); 00403 } 00404 00405 #endif 00406 00407 // ###################################################################### 00408 /* So things look consistent in everyone's emacs... */ 00409 /* Local Variables: */ 00410 /* indent-tabs-mode: nil */ 00411 /* End: */