DPM.H

00001 /*!@file Features/DPM.H  deformable part models */
00002 //Code derived from http://www.cs.uchicago.edu/~pff/latent/
00003 //author = "Felzenszwalb, P. F. and Girshick, R. B. and McAllester, D.",
00004 //title = "Discriminatively Trained Deformable Part Models, Release 4",
00005 //howpublished = "http://people.cs.uchicago.edu/~pff/latent-release4/"}
00006 
00007 // //////////////////////////////////////////////////////////////////// //
00008 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005   //
00009 // by the University of Southern California (USC) and the iLab at USC.  //
00010 // See http://iLab.usc.edu for information about this project.          //
00011 // //////////////////////////////////////////////////////////////////// //
00012 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00013 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00014 // in Visual Environments, and Applications'' by Christof Koch and      //
00015 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00016 // pending; application number 09/912,225 filed July 23, 2001; see      //
00017 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00018 // //////////////////////////////////////////////////////////////////// //
00019 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00020 //                                                                      //
00021 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00022 // redistribute it and/or modify it under the terms of the GNU General  //
00023 // Public License as published by the Free Software Foundation; either  //
00024 // version 2 of the License, or (at your option) any later version.     //
00025 //                                                                      //
00026 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00027 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00028 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00029 // PURPOSE.  See the GNU General Public License for more details.       //
00030 //                                                                      //
00031 // You should have received a copy of the GNU General Public License    //
00032 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00033 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00034 // Boston, MA 02111-1307 USA.                                           //
00035 // //////////////////////////////////////////////////////////////////// //
00036 //
00037 // Primary maintainer for this file: Lior Elazary
00038 // $HeadURL$
00039 // $Id$
00040 //
00041 
00042 #ifndef DPM_H_DEFINED
00043 #define DPM_H_DEFINED
00044 
00045 #include "Util/Types.H"
00046 #include "Image/Image.H"
00047 #include "Image/Pixels.H"
00048 #include "Image/ImageSet.H"
00049 #include "Features/HOG.H"
00050 #include "Util/JobServer.H"
00051 #include "Util/JobWithSemaphore.H"
00052 #include "Util/WorkThreadServer.H"
00053 
00054 #include <vector>
00055 #include <string>
00056 #include <stdio.h>
00057 
00058 class DPM
00059 {
00060 public:
00061 
00062 
00063   struct HOGFeatures
00064   {
00065     ImageSet<double> features;
00066     int bins;
00067     double scale;
00068   };
00069 
00070   struct ModelScore 
00071   {
00072     int level; //Level int the pyramid
00073     Image<double> score; //the max Score between components
00074     Image<int> component; //the comp that produced this score
00075 
00076     ModelScore(const Image<double> s, const Image<int> comp, int l) :
00077       level(l), score(s), component(comp)
00078     {
00079     }
00080   };
00081 
00082   struct Detection
00083   {
00084     Rectangle bb; //the bounding box
00085     double score; //the score
00086     int component; //the component that lead to this score
00087 
00088     Detection(const Rectangle rect, double s, int c) :
00089       bb(rect),
00090       score(s),
00091       component(c)
00092     {
00093     }
00094   };
00095 
00096 
00097   struct ModelPart
00098   {
00099     ImageSet<double> features;
00100     Point2D<float> anchor;
00101     float scale;  //The scale from which the anchor is set
00102     std::vector<double> deformation;
00103   };
00104 
00105   struct ModelComponent
00106   {
00107     ImageSet<double> rootFilter;
00108     double offset;
00109     std::vector<ModelPart> parts;
00110   };
00111 
00112   struct Model
00113   {
00114     std::vector<ModelComponent> components;
00115   };
00116   
00117   //! Constructor
00118   DPM();
00119 
00120   //! Destructor
00121   virtual ~DPM();
00122 
00123   void computeFeaturePyramid(const Image<PixRGB<byte> >& img);
00124   
00125   //! Load a model from a file
00126   void readModel(const char* fileName);
00127 
00128   //! Get an image showing the model
00129   Image<PixRGB<byte> > getModelImage();
00130 
00131   //! Convolve the whole model
00132   void convolveModel();
00133 
00134   //! Convolve just a component at a particular level in the pyramid
00135   Image<double> convolveComponent(const int comp, const int level);
00136   
00137   
00138   //! Get bounding boxes
00139   std::vector<Detection> getBoundingBoxes(const float thresh);
00140 
00141   //! Preform filtering using non maximum suppression
00142   std::vector<Detection> filterDetections(const std::vector<Detection>& detections, const float overlap);
00143   
00144   Image<double> convolveFeatures(const ImageSet<double>& imgFeatures, 
00145       const ImageSet<double>& filterFeatures);
00146  
00147   //! Distance transform for deformation
00148   Image<double> distanceTrans(const Image<double>& score,
00149       const std::vector<double>& deformation);
00150  
00151   //! Distance transform helper
00152   void dtHelper(const Image<double>::const_iterator src,
00153       Image<double>::iterator dst,
00154       Image<int>::iterator ptr,
00155       int step,
00156       int s1, int s2, int d1, int d2,
00157       double a, double b);
00158   
00159   
00160 protected:
00161   
00162   class DPMJob : public JobWithSemaphore
00163   {
00164     public:
00165       DPMJob(DPM* dpm, const int comp, const int l) :
00166         itsDPM(dpm), itsComponent(comp), itsLevel(l) 
00167     { }
00168 
00169       virtual ~DPMJob() { }
00170 
00171       virtual void run()
00172       {
00173         itsScore = itsDPM->convolveComponent(itsComponent, itsLevel);
00174         this->markFinished();
00175       }
00176 
00177       Image<double> getScore() { return itsScore; }
00178       int getComponent() { return itsComponent; }
00179 
00180       virtual const char* jobType() const { return "DPMJob"; }
00181 
00182     private:
00183       DPM* itsDPM;
00184       int itsComponent;
00185       int itsLevel;
00186       Image<double> itsScore;
00187   };
00188 
00189 
00190 private:
00191   std::vector<HOGFeatures> itsFeaturesPyramid;
00192   Model itsModel;
00193 
00194   int itsInterval; //the interval between features in the pyramid
00195 
00196   std::vector<ModelScore> itsModelScores; //Scores of the detections
00197 
00198   rutz::shared_ptr<WorkThreadServer> itsThreadServer;
00199   
00200 };
00201 
00202 
00203 // ######################################################################
00204 /* So things look consistent in everyone's emacs... */
00205 /* Local Variables: */
00206 /* indent-tabs-mode: nil */
00207 /* End: */
00208 
00209 #endif //