FOEestimator.C

Go to the documentation of this file.
00001 /*!@file MBARI/FOEestimator.C a class for estimating the focus of expansion
00002   in a video
00003  */
00004 // //////////////////////////////////////////////////////////////////// //
00005 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2002   //
00006 // by the University of Southern California (USC) and the iLab at USC.  //
00007 // See http://iLab.usc.edu for information about this project.          //
00008 // //////////////////////////////////////////////////////////////////// //
00009 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00010 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00011 // in Visual Environments, and Applications'' by Christof Koch and      //
00012 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00013 // pending; application number 09/912,225 filed July 23, 2001; see      //
00014 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00015 // //////////////////////////////////////////////////////////////////// //
00016 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00017 //                                                                      //
00018 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00019 // redistribute it and/or modify it under the terms of the GNU General  //
00020 // Public License as published by the Free Software Foundation; either  //
00021 // version 2 of the License, or (at your option) any later version.     //
00022 //                                                                      //
00023 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00024 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00025 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00026 // PURPOSE.  See the GNU General Public License for more details.       //
00027 //                                                                      //
00028 // You should have received a copy of the GNU General Public License    //
00029 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00030 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00031 // Boston, MA 02111-1307 USA.                                           //
00032 // //////////////////////////////////////////////////////////////////// //
00033 //
00034 // Primary maintainer for this file: Dirk Walther <walther@caltech.edu>
00035 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/MBARI/FOEestimator.C $
00036 // $Id: FOEestimator.C 7293 2006-10-20 18:49:55Z rjpeters $
00037 //
00038 
00039 #include "MBARI/FOEestimator.H"
00040 
00041 #include "Image/CutPaste.H"  // for crop()
00042 #include "Image/ImageSet.H"
00043 #include "Image/PyramidOps.H"
00044 
00045 #include <vector>
00046 
00047 // ######################################################################
00048 FOEestimator::FOEestimator(int numAvg, int pyramidLevel)
00049   : itsPyrLevel(pyramidLevel),
00050     itsFrames(3),
00051     itsXvectors(numAvg),
00052     itsYvectors(numAvg)
00053 { }
00054 
00055 // ######################################################################
00056 Vector2D FOEestimator::updateFOE(const Image<byte>& img,
00057                                  const Rectangle region)
00058 {
00059   return (updateFOE(crop(img,region)) +
00060           Vector2D(region.left(),region.top()));
00061 }
00062 
00063 // ######################################################################
00064 Vector2D FOEestimator::updateFOE(const Image<byte>& img)
00065 {
00066   // subsample the image to the desired level
00067   ImageSet<byte> iset = buildPyrGaussian(img,0,itsPyrLevel+1,3);
00068   itsFrames.push_back(iset[itsPyrLevel]);
00069 
00070   // need at least three frames to compute optical flow
00071   if (itsFrames.size() < 3) return Vector2D();
00072 
00073   int w = itsFrames.back().getWidth();
00074   int h = itsFrames.back().getHeight();
00075 
00076   // get the iterators ready to step through the three images
00077   // at times (t-1), t, and (t+1)
00078   Image<byte>::const_iterator it,tm,tp,xm,xp,ym,yp;
00079   it = itsFrames[1].begin();
00080   xm = it + w;
00081   xp = it + w + 2;
00082   ym = it + 1;
00083   yp = it + w + w + 1;
00084   tm = itsFrames[0].begin() + w + 1;
00085   tp = itsFrames[2].begin() + w + 1;
00086 
00087   // prepare the vectors for the results
00088   Image<float> vx(w-2,1,ZEROS);
00089   Image<float> vy(h-2,1,ZEROS);
00090   std::vector<int> cx(w-2,0), cy(h-2,0);
00091 
00092   // compute optical flow and sum up the x components over y, and the
00093   // y components over x
00094   for (int y = 0; y < h-2; ++y)
00095     {
00096       for (int x = 0; x < w-2; ++x)
00097         {
00098           // x component of optical flow
00099           if (*xp != *xm)
00100             {
00101               vx[x] += (float(*tm)-float(*tp)) / (float(*xp) - float(*xm));
00102               cx[x]++;
00103             }
00104 
00105           // y component of optical flow
00106           if (*yp != *ym)
00107             {
00108               vy[y] += (float(*tm)-float(*tp)) / (float(*yp) - float(*ym));
00109               cy[y]++;
00110             }
00111           ++tm; ++tp; ++xm; ++xp; ++ym; ++yp;
00112         }
00113 
00114       // divide the y guys by the number of contributions
00115       if (cy[y] > 0) vy[y] /= cy[y];
00116 
00117       // offset to jump to the next line
00118       tm += 2; tp += 2;
00119       xm += 2; xp += 2;
00120       ym += 2; yp += 2;
00121     }
00122 
00123   // divide the x guys by the number of contributions
00124   for (int x = 0; x < w-2; ++x)
00125       if (cx[x] > 0) vx[x] /= cx[x];
00126 
00127   // store the vectors in the cache to generate averages
00128   itsXvectors.push_back(vx);
00129   itsYvectors.push_back(vy);
00130 
00131   // now do the linear regression for the x and y directions over the mean
00132   float x0 = getZeroCrossing(itsXvectors.mean());
00133   float y0 = getZeroCrossing(itsYvectors.mean());
00134 
00135   // need to scale the coordinates according to the subsampling done
00136   float pw2 = pow(2,itsPyrLevel);
00137   itsFOE.reset(pw2*(x0+1),pw2*(y0+1));
00138 
00139   return itsFOE;
00140 }
00141 
00142 
00143 
00144 // ######################################################################
00145 float FOEestimator::getZeroCrossing(const Image<float>& vec)
00146 {
00147   float N = vec.getWidth();
00148   float sx = N * (N - 1) / 2;
00149   float sxx = N * (N - 1) * (2*N - 1) / 6;
00150   float sy = 0.0F, sxy = 0.0F;
00151   for (int i = 0; i < N; ++i)
00152     {
00153       sy += vec[i];
00154       sxy += i*vec[i];
00155     }
00156   return (sx * sxy - sxx * sy) / (N * sxy - sx * sy);
00157 }
00158 
00159 
00160 // ######################################################################
00161 Vector2D FOEestimator::getFOE()
00162 {
00163   return itsFOE;
00164 }
00165 
00166 
00167 // ######################################################################
00168 /* So things look consistent in everyone's emacs... */
00169 /* Local Variables: */
00170 /* indent-tabs-mode: nil */
00171 /* End: */