FfmpegDecoder.C

Go to the documentation of this file.
00001 /*!@file Media/FfmpegDecoder.C Low-level class for using ffmpeg to decode movie files */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005   //
00005 // by the University of Southern California (USC) and the iLab at USC.  //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: Rob Peters <rjpeters at usc dot edu>
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Media/FfmpegDecoder.C $
00035 // $Id: FfmpegDecoder.C 12611 2010-01-20 22:49:23Z beobot $
00036 //
00037 
00038 #ifndef MEDIA_FFMPEGDECODER_C_DEFINED
00039 #define MEDIA_FFMPEGDECODER_C_DEFINED
00040 
00041 #ifdef INVT_HAVE_AVCODEC
00042 
00043 #include "Media/FfmpegDecoder.H"
00044 
00045 #include "Image/Image.H"
00046 #include "Image/Pixels.H"
00047 #include "Raster/GenericFrame.H"
00048 #include "Util/Assert.H"
00049 #include "Util/log.H"
00050 #include "Video/FfmpegFrame.H"
00051 #include "Video/VideoFrame.H"
00052 #include "rutz/trace.h"
00053 
00054 #include <cstdlib>
00055 #define attribute_deprecated
00056 
00057 extern "C"
00058 {
00059 //These directives are necessary to handle the various places in which
00060 //different versions of ffmpeg install their files. Unfortunately,
00061 //it looks like the cdeps program can't handle the more elegant
00062 //#if defined(XXX) #elif defined(XXX) #endif, so we have to do the following:
00063 
00064 #ifdef HAVE_LIBAVCODEC_AVCODEC_H
00065   #include <libavcodec/avcodec.h>
00066 #else
00067 #ifdef HAVE_FFMPEG_AVCODEC_H
00068   #include <ffmpeg/avcodec.h>
00069 #endif
00070 #endif
00071 
00072 #ifdef HAVE_LIBAVFORMAT_AVFORMAT_H
00073   #include <libavformat/avformat.h>
00074 #else
00075 #ifdef HAVE_FFMPEG_AVFORMAT_H
00076   #include <ffmpeg/avformat.h>
00077 #endif
00078 #endif
00079 }
00080 
00081 #include <sys/stat.h>
00082 #include <sys/types.h>
00083 #include <termios.h>
00084 
00085 namespace
00086 {
00087   bool isNonNegative(int v) { return v >= 0; }
00088   bool isNonNegative(unsigned int v) { return true; }
00089 
00090   template <class T>
00091   Image<T> vFlip(const Image<T>& src)
00092   {
00093   GVX_TRACE(__PRETTY_FUNCTION__);
00094 
00095     Image<T> result(src.getDims(), NO_INIT);
00096 
00097     const int w = src.getWidth();
00098     const int h = src.getHeight();
00099 
00100     const T* sptr = src.getArrayPtr();
00101     T* dptr = result.getArrayPtr() + (h-1)*w;
00102 
00103     for (int y = 0; y < h; ++y)
00104       {
00105         safecopy(dptr, sptr, w);
00106         sptr += w;
00107         dptr -= w;
00108       }
00109 
00110     return result;
00111   }
00112 
00113   AVCodec* findVideoCodec(const char* fname, AVInputFormat* iformat)
00114   {
00115     AVFormatContext* ic;
00116     int err = av_open_input_file(&ic, fname, iformat, 0, NULL);
00117     if (err < 0)
00118       LFATAL("Error opening input file %s: %d", fname, err);
00119 
00120     err = av_find_stream_info(ic);
00121     if (err < 0)
00122       LFATAL("Cannot determine stream codec parameters: %d", err);
00123 
00124     LINFO("Using input format '%s' (%s)",
00125           ic->iformat->name, ic->iformat->long_name);
00126 
00127     AVCodec* result = 0;
00128 
00129     // Prior to ffmpeg svn revision 7556, nb_streams was 'int', but then
00130     // changed to 'unsigned int'; to allow either type we will later
00131     // cast it to unsigned int, but first we want to check that it is
00132     // indeed non-negative:
00133     ASSERT(isNonNegative(ic->nb_streams));
00134 
00135     for (unsigned int i = 0; i < (unsigned int)(ic->nb_streams); ++i)
00136       {
00137 #ifdef INVT_FFMPEG_AVSTREAM_CODEC_IS_POINTER
00138         const AVCodecContext* const cc = ic->streams[i]->codec;
00139 #else
00140         const AVCodecContext* const cc = ic->streams[i]->codec;
00141 #endif
00142         if (cc->codec_type == CODEC_TYPE_VIDEO)
00143           {
00144             result = avcodec_find_decoder(cc->codec_id);
00145             if (result == 0)
00146               LFATAL("codec not found");
00147             break;
00148           }
00149       }
00150 
00151     av_close_input_file(ic);
00152 
00153     if (result == 0)
00154       LFATAL("Could not find a video stream in input file %s", fname);
00155 
00156     return result;
00157   }
00158 }
00159 
00160 // ######################################################################
00161 FfmpegDecoder::FfmpegDecoder(const char* codecname,
00162                              const int bufflen,
00163                              const char* fname,
00164                              const bool preload)
00165   :
00166   itsFile(NULL),
00167   itsContext(),
00168   itsPicture(),
00169   itsFrameNumber(0),
00170   itsBuf(),
00171   itsIdxStart(0),
00172   itsIdxEnd(0),
00173   itsInputEOF(false),
00174   itsDimsValid(false),
00175   itsNextFramePushback(false)
00176 {
00177   GVX_TRACE(__PRETTY_FUNCTION__);
00178 
00179   // no need to guard these functions for being called multiple times;
00180   // they all have internal guards
00181   av_register_all();
00182   avcodec_init();
00183   avcodec_register_all();
00184 
00185   AVInputFormat* iformat = NULL;
00186   if (strcmp(codecname, "List") == 0)
00187     {
00188       // list available codecs
00189       LINFO("##### Available input codecs (not all may work for video):");
00190       for(AVInputFormat* f = first_iformat; f != NULL; f = f->next)
00191         LINFO("%s: %s %d", f->name, f->long_name, f->flags);
00192       LFATAL("Please select a codec from this list");
00193     }
00194   else if (strcmp(codecname, "Auto") != 0)
00195     {
00196       // format is given
00197       iformat = av_find_input_format(codecname);
00198     }
00199 
00200   // ok, let's find a video stream:
00201   AVCodec* const codec = findVideoCodec(fname, iformat);
00202 
00203   ASSERT(codec != 0);
00204 
00205 #if defined(INVT_FFMPEG_HAS_DEFAULTS_FUNCTIONS)
00206   avcodec_get_context_defaults(&itsContext);
00207 #else
00208   {
00209     AVCodecContext* const tmp = avcodec_alloc_context();
00210     memcpy(&itsContext, tmp, sizeof(AVCodecContext));
00211     free(tmp);
00212   }
00213 #endif
00214 
00215 #if defined(INVT_FFMPEG_HAS_DEFAULTS_FUNCTIONS)
00216   avcodec_get_frame_defaults(&itsPicture);
00217 #else
00218   {
00219     AVFrame* tmp = avcodec_alloc_frame();
00220     memcpy(&itsPicture, tmp, sizeof(AVFrame));
00221     free(tmp);
00222   }
00223 #endif
00224 
00225   if (codec->capabilities & CODEC_CAP_TRUNCATED)
00226     itsContext.flags |= CODEC_FLAG_TRUNCATED;
00227 
00228   if (avcodec_open(&itsContext, codec) < 0)
00229     LFATAL("could not open codec\n");
00230 
00231   // open the stream:
00232   if (itsFile) fclose(itsFile);
00233   itsFile = fopen(fname, "rb");
00234   if (itsFile == NULL)
00235     LFATAL("could not open file! %s", fname);
00236 
00237   // get a read buffer:
00238   int blen;
00239   if (preload) // allocate a buffer for the entire movie
00240     {
00241       struct stat st;
00242       const int fd = fileno(itsFile);
00243       if (fd == -1) PLFATAL("Problem with fileno()");
00244       if (fstat(fd, &st) == -1) PLFATAL("Cannot stat %s", fname);
00245       blen = int(st.st_size);
00246     }
00247   else
00248     blen = bufflen; // allocate a buffer for a chunk of movie
00249 
00250   itsBuf.resize(blen);
00251   itsIdxStart = 0;
00252   itsIdxEnd = 0;
00253   itsFrameNumber = 0;
00254   itsInputEOF = false;
00255   itsDimsValid = false;
00256 
00257   LINFO("libavcodec build %d (%d.%d.%d)",
00258         int(LIBAVCODEC_BUILD),
00259         int((LIBAVCODEC_BUILD & 0xff0000) >> 16),
00260         int((LIBAVCODEC_BUILD & 0xff00) >> 8),
00261         int((LIBAVCODEC_BUILD & 0xff) >> 0));
00262 
00263   LINFO("libavformat build %d (%d.%d.%d)",
00264         int(LIBAVFORMAT_BUILD),
00265         int((LIBAVFORMAT_BUILD & 0xff0000) >> 16),
00266         int((LIBAVFORMAT_BUILD & 0xff00) >> 8),
00267         int((LIBAVFORMAT_BUILD & 0xff) >> 0));
00268 
00269   char buf[512];
00270   avcodec_string(&buf[0], sizeof(buf), &itsContext, /*encode=*/ 0);
00271   buf[sizeof(buf)-1] = '\0';
00272   LINFO("%s [%s]", fname, &buf[0]);
00273 
00274   // if preload, let's load up the entire movie now and close itsFile:
00275   if (preload)
00276     {
00277       const int size = fread(&itsBuf[0], 1, itsBuf.size(), itsFile);
00278       if (size <= 0) PLFATAL("Read error");
00279       itsIdxEnd = size_t(size);
00280       // close the stream since we have all the data already:
00281       fclose(itsFile);
00282       itsFile = NULL;
00283       LINFO("pre-loaded %s", fname);
00284     }
00285 }
00286 
00287 // ######################################################################
00288 FfmpegDecoder::~FfmpegDecoder()
00289 {
00290   GVX_TRACE(__PRETTY_FUNCTION__);
00291 
00292   if (itsFile) { fclose(itsFile); }
00293   avcodec_close(&itsContext);
00294 }
00295 
00296 // ######################################################################
00297 int FfmpegDecoder::apparentFrameNumber() const
00298 {
00299   GVX_TRACE(__PRETTY_FUNCTION__);
00300 
00301   return
00302     itsNextFramePushback
00303     ? itsFrameNumber - 1
00304     : itsFrameNumber;
00305 }
00306 
00307 // ######################################################################
00308 GenericFrameSpec FfmpegDecoder::peekFrameSpec()
00309 {
00310   GVX_TRACE(__PRETTY_FUNCTION__);
00311 
00312   if (!itsDimsValid)
00313     {
00314       // if we've already peeked at the next frame, then the dims
00315       // should have already be valid:
00316       ASSERT(!itsNextFramePushback);
00317 
00318       readRawFrame();
00319       itsNextFramePushback = true;
00320     }
00321 
00322   ASSERT(itsDimsValid);
00323 
00324   GenericFrameSpec result;
00325 
00326   result.nativeType = GenericFrame::VIDEO;
00327   result.videoFormat =
00328     convertAVPixelFormatToVideoFormat(itsContext.pix_fmt);
00329   result.videoByteSwap = false;
00330   result.dims = Dims(itsContext.width, itsContext.height);
00331   result.floatFlags = 0;
00332 
00333 #if defined(LIBAVCODEC_BUILD) && (LIBAVCODEC_BUILD >= 4754) // SVN rev >= 4168
00334   result.frameRate = static_cast<float>(1/av_q2d(itsContext.time_base)) ;
00335 #else // assume FFmpeg libavcodec build 4753 or earlier (i.e., SVN rev <= 4161)
00336   result.frameRate = itsContext.frame_rate ;
00337 #endif
00338 
00339   return result;
00340 }
00341 
00342 // ######################################################################
00343 VideoFrame FfmpegDecoder::readVideoFrame()
00344 {
00345   // note that we need to force the peekFrameSpec() call to occur
00346   // before the convertAVFrameToVideoFrame() call, so that
00347   // itsContext.{width,height} are properly initialized
00348   const GenericFrameSpec spec = this->peekFrameSpec();
00349 
00350   return convertAVFrameToVideoFrame(this->readRawFrame(),
00351                                     itsContext.pix_fmt,
00352                                     spec.dims);
00353 }
00354 
00355 // ######################################################################
00356 Image<PixRGB<byte> > FfmpegDecoder::readRGB()
00357 {
00358   // note that we need to force the peekFrameSpec() call to occur
00359   // before the convertAVFrameToRGB() call, so that
00360   // itsContext.{width,height} are properly initialized
00361   const GenericFrameSpec spec = this->peekFrameSpec();
00362 
00363   return convertAVFrameToRGB(this->readRawFrame(),
00364                              itsContext.pix_fmt,
00365                              spec.dims);
00366 }
00367 
00368 // ######################################################################
00369 bool FfmpegDecoder::readAndDiscardFrame()
00370 {
00371   return (readRawFrame() != 0);
00372 }
00373 
00374 // ######################################################################
00375 const AVFrame* FfmpegDecoder::readRawFrame()
00376 {
00377   GVX_TRACE(__PRETTY_FUNCTION__);
00378 
00379   if (itsNextFramePushback)
00380     {
00381       itsNextFramePushback = false;
00382       return &itsPicture;
00383     }
00384 
00385   if (itsInputEOF) return NULL;  // we have reached end of file already
00386 
00387   int nlen0 = 0;
00388 
00389   while (true)
00390     {
00391       bool goteof = false;
00392 
00393       ASSERT(itsIdxEnd >= itsIdxStart);
00394 
00395       // do we need to read more data from file?
00396       if (itsIdxEnd < itsIdxStart + 16384)
00397         {
00398           const size_t size = this->refillBuffer();
00399           if (size == 0) goteof = true; // end of file
00400         }
00401 
00402       LDEBUG("buffer range = %"ZU" - %"ZU" of %"ZU", goteof=%d",
00403              itsIdxStart, itsIdxEnd, itsBuf.size(), int(goteof));
00404 
00405       // decode contents of our read buffer if any:
00406       int gotpic = 0;
00407       const int len =
00408         avcodec_decode_video(&itsContext, &itsPicture, &gotpic,
00409                              &itsBuf[itsIdxStart],
00410                              itsIdxEnd - itsIdxStart);
00411 
00412       if (len == 0) ++nlen0;
00413       else nlen0 = 0;
00414 
00415       LDEBUG("end-start=%"ZU", len=%d, nlen0=%d, gotpic=%d",
00416              itsIdxEnd-itsIdxStart, len, nlen0, gotpic);
00417 
00418       if (len < 0)
00419         LFATAL("Error while decoding frame %d", itsFrameNumber);
00420       else if (size_t(len) > (itsIdxEnd - itsIdxStart))
00421         {
00422           const size_t minsize =
00423             std::max(4*size_t(len)+4096,
00424                      4*(itsIdxEnd-itsIdxStart)+4096);
00425 
00426           if (minsize > itsBuf.size())
00427             // ok, libavcodec needs a bigger buffer in order to be
00428             // able to hold a full frame, so let's do that now:
00429             itsBuf.resize(minsize);
00430 
00431           // ok, the decoder wants more data...
00432           const size_t size = this->refillBuffer();
00433           if (size == 0)
00434             LFATAL("libavcodec wanted more data, but we are at eof");
00435         }
00436       else
00437         {
00438           itsIdxStart += len;
00439           if ((itsIdxStart == itsIdxEnd) && goteof && (gotpic || nlen0 >= 2))
00440             itsInputEOF = true; // decoded last frame
00441           if ((itsIdxStart == itsIdxEnd) && goteof && (nlen0 >= 2))
00442             return NULL;
00443           if (gotpic)
00444             {
00445               ++itsFrameNumber;
00446               itsDimsValid = true;
00447               return &itsPicture;
00448             }
00449         }
00450     }
00451 }
00452 
00453 // ######################################################################
00454 size_t FfmpegDecoder::refillBuffer()
00455 {
00456   // let's move the data we have up to the front of the buffer, and
00457   // then fill the buffer again
00458   const size_t nsave = itsIdxEnd - itsIdxStart;
00459   ASSERT(itsBuf.size() > nsave);
00460 
00461   if (nsave > 0)
00462     memmove(&itsBuf[0], &itsBuf[itsIdxStart], nsave);
00463   itsIdxStart = 0;
00464   itsIdxEnd = nsave;
00465 
00466   // if our file is already closed (e.g. if we preloaded the entire
00467   // movie), then we can't read any more data, so just return 0:
00468   if (itsFile == 0)
00469     return 0;
00470 
00471   const int size = fread(&itsBuf[0] + nsave, 1,
00472                          itsBuf.size() - nsave, itsFile);
00473   if (size < 0)
00474     PLFATAL("read error");
00475   itsIdxEnd += size;
00476   return size_t(size);
00477 }
00478 
00479 #endif // INVT_HAVE_AVCODEC
00480 
00481 // ######################################################################
00482 /* So things look consistent in everyone's emacs... */
00483 /* Local Variables: */
00484 /* mode: c++ */
00485 /* indent-tabs-mode: nil */
00486 /* End: */
00487 
00488 #endif // MEDIA_FFMPEGDECODER_C_DEFINED