FfmpegPacketDecoder.C

Go to the documentation of this file.
00001 /*!@file Media/FfmpegPacketDecoder.C */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005   //
00005 // by the University of Southern California (USC) and the iLab at USC.  //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: Rob Peters <rjpeters at usc dot edu>
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Media/FfmpegPacketDecoder.C $
00035 // $Id: FfmpegPacketDecoder.C 12611 2010-01-20 22:49:23Z beobot $
00036 //
00037 
00038 // Some code in this file is based on ffplay from the ffmpeg
00039 // distribution, with this original copyright notice:
00040 
00041 /*
00042  * FFplay : Simple Media Player based on the ffmpeg libraries
00043  * Copyright (c) 2003 Fabrice Bellard
00044  *
00045  * This library is free software; you can redistribute it and/or
00046  * modify it under the terms of the GNU Lesser General Public
00047  * License as published by the Free Software Foundation; either
00048  * version 2 of the License, or (at your option) any later version.
00049  *
00050  * This library is distributed in the hope that it will be useful,
00051  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00052  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00053  * Lesser General Public License for more details.
00054  *
00055  * You should have received a copy of the GNU Lesser General Public
00056  * License along with this library; if not, write to the Free Software
00057  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00058  */
00059 
00060 #ifndef MEDIA_FFMPEGPACKETDECODER_C_DEFINED
00061 #define MEDIA_FFMPEGPACKETDECODER_C_DEFINED
00062 
00063 #ifdef INVT_HAVE_AVCODEC
00064 
00065 #include "Media/FfmpegPacketDecoder.H"
00066 
00067 #include "Raster/GenericFrame.H"
00068 #include "Util/Assert.H"
00069 #include "Util/log.H"
00070 #include "Video/FfmpegFrame.H"
00071 
00072 #include <cerrno>
00073 
00074 // see http://dranger.com/ffmpeg/ffmpegtutorial_all.txt for useful info
00075 
00076 namespace
00077 {
00078   // ######################################################################
00079   static int& eof_reached(AVFormatContext* c)
00080   {
00081     // AVFormatContext's pb member changed from a 'ByteIOContext' to a
00082     // 'ByteIOContext*' with ffmpeg svn rev 11071; see this thread:
00083     // http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/2007-November/037797.html
00084 #if defined (LIBAVFORMAT_BUILD) && (LIBAVFORMAT_BUILD >= ((52<<16)+(0<<8)+0))
00085     ASSERT(c->pb != 0);
00086     return c->pb->eof_reached;
00087 #else
00088     return c->pb.eof_reached;
00089 #endif
00090   }
00091 
00092   // ######################################################################
00093   const char* get_error_msg(int err)
00094   {
00095     switch(err)
00096       {
00097       case AVERROR_NUMEXPECTED:
00098         return
00099           "Incorrect image filename syntax.\n"
00100           "Use '%%d' to specify the image number:\n"
00101           "  for img1.jpg, img2.jpg, ..., use 'img%%d.jpg';\n"
00102           "  for img001.jpg, img002.jpg, ..., use 'img%%03d.jpg'.";
00103 
00104       case AVERROR_INVALIDDATA:
00105         return "Error while parsing header";
00106 
00107       case AVERROR_NOFMT:
00108         return "Unknown format";
00109 
00110       case AVERROR_IO:
00111         return
00112           "I/O error occured\n"
00113           "Usually that means that input file is truncated and/or corrupted.";
00114 
00115       case AVERROR_NOMEM:
00116         return "memory allocation error";
00117       }
00118 
00119     // default:
00120     return "Error while opening file";
00121   }
00122 
00123   bool isNonNegative(int v) { return v >= 0; }
00124   bool isNonNegative(unsigned int v) { return true; }
00125 }
00126 
00127 // ######################################################################
00128 FfmpegPacketDecoder::FfmpegPacketDecoder(const char* fname,
00129                                          const bool preload)
00130   :
00131   itsFilename(fname),
00132   itsFormatContext(0),
00133   itsCodecContext(0),
00134   itsStreamID(-1),
00135   itsFrameNumber(0),
00136   itsDimsValid(false),
00137   itsNextFramePushback(false),
00138   itsPacketsExhausted(false),
00139   itsPacketQ()
00140 {
00141   // no need to guard these functions for being called multiple times;
00142   // they all have internal guards
00143   av_register_all();
00144   avcodec_init();
00145   avcodec_register_all();
00146 
00147   AVFormatParameters params;
00148   memset(&params, 0, sizeof(params));
00149 #if defined (LIBAVFORMAT_BUILD) && (LIBAVFORMAT_BUILD <= ((50<<16)+(6<<8)))
00150   // AVImageFormat* disappeared from ffmpeg mainline on 2006-11-02;
00151   // last version with it was 50.6.0 and first version without it was
00152   // 51.6.0
00153   params.image_format = 0;
00154 #endif
00155 #if defined (LIBAVFORMAT_BUILD) && (LIBAVFORMAT_BUILD >= 4610)
00156   params.initial_pause = 1; /* we force a pause when starting an RTSP
00157                             stream */
00158 #endif
00159 
00160   int err = av_open_input_file(&itsFormatContext, itsFilename.c_str(),
00161                                0, 0, &params);
00162   if (err < 0)
00163     {
00164       LFATAL("%s: %s %i", itsFilename.c_str(), get_error_msg(err), err);
00165     }
00166 
00167   err = av_find_stream_info(itsFormatContext);
00168   if (err < 0)
00169     {
00170       LFATAL("%s: could not find codec parameters\n",
00171              itsFilename.c_str());
00172     }
00173   eof_reached(itsFormatContext) = 0; //FIXME hack, ffplay maybe shouldnt use url_feof() to test for the end
00174 
00175   /* now we can begin to play (RTSP stream only) */
00176 #if defined (LIBAVFORMAT_BUILD) && (LIBAVFORMAT_BUILD >= 4610)
00177   av_read_play(itsFormatContext);
00178 #endif
00179 
00180   LDEBUG("%s: %d streams in file", fname,
00181          itsFormatContext->nb_streams);
00182 
00183   // Prior to ffmpeg svn revision 7556, nb_streams was 'int', but then
00184   // changed to 'unsigned int'; to allow either type we will later
00185   // cast it to unsigned int, but first we want to check that it is
00186   // indeed non-negative:
00187   ASSERT(isNonNegative(itsFormatContext->nb_streams));
00188 
00189   for (unsigned int i = 0; i < (unsigned int)(itsFormatContext->nb_streams); ++i)
00190     {
00191 #ifdef INVT_FFMPEG_AVSTREAM_CODEC_IS_POINTER
00192       AVCodecContext *enc = itsFormatContext->streams[i]->codec;
00193 #else
00194       AVCodecContext *enc = itsFormatContext->streams[i]->codec;
00195 #endif
00196 
00197       char buf[512];
00198       avcodec_string(&buf[0], sizeof(buf), enc, /*encode=*/ 0);
00199       buf[sizeof(buf)-1] = '\0';
00200 
00201       LDEBUG("%s: stream %u/%u: codec_string=%s",
00202              fname, i, (unsigned int)(itsFormatContext->nb_streams),
00203              buf);
00204 
00205       if (enc->codec_type == CODEC_TYPE_VIDEO)
00206         {
00207           itsStreamID = i;
00208           itsCodecContext = enc;
00209           break;
00210         }
00211     }
00212 
00213   if (itsStreamID < 0)
00214     LFATAL("%s: no video stream", itsFilename.c_str());
00215 
00216   ASSERT(itsCodecContext != 0);
00217   AVCodec* codec = avcodec_find_decoder(itsCodecContext->codec_id);
00218 
00219   if (codec == 0)
00220     LFATAL("%s: no codec found for codec_id=%d",
00221            fname, int(itsCodecContext->codec_id));
00222 
00223 #if defined(LIBAVCODEC_BUILD) && (LIBAVCODEC_BUILD >= 4697) // rev 2636
00224   itsCodecContext->debug_mv = 0;
00225 #endif
00226   itsCodecContext->debug = 0;
00227   itsCodecContext->workaround_bugs = 1;
00228 #if defined(LIBAVCODEC_BUILD) && (LIBAVCODEC_BUILD >= 4722) // rev 3507
00229   itsCodecContext->lowres = 0;
00230   if (itsCodecContext->lowres)
00231     itsCodecContext->flags |= CODEC_FLAG_EMU_EDGE;
00232 #endif
00233   itsCodecContext->idct_algo= FF_IDCT_AUTO;
00234 #if defined(LIBAVCODEC_BUILD) && (LIBAVCODEC_BUILD > 4721) // rev 3429
00235   if (0)
00236     itsCodecContext->flags2 |= CODEC_FLAG2_FAST;
00237 #endif
00238 #if defined(LIBAVCODEC_BUILD) && (LIBAVCODEC_BUILD >= 4758) // rev 4440
00239   itsCodecContext->skip_frame= AVDISCARD_DEFAULT;
00240   itsCodecContext->skip_idct= AVDISCARD_DEFAULT;
00241   itsCodecContext->skip_loop_filter= AVDISCARD_DEFAULT;
00242 #endif
00243 
00244 #if defined(LIBAVCODEC_BUILD) && (LIBAVCODEC_BUILD > 3410431) // rev 5210
00245   // no more error_resilience parameter?
00246 #elif defined(LIBAVCODEC_BUILD) && (LIBAVCODEC_BUILD > 3276800) // rev 4590
00247   itsCodecContext->error_resilience= FF_ER_CAREFUL;
00248 #else
00249   itsCodecContext->error_resilience= FF_ER_CAREFULL;
00250 #endif
00251   itsCodecContext->error_concealment= 3;
00252   if (!codec || avcodec_open(itsCodecContext, codec) < 0)
00253     LFATAL("avcodec_open() failed");
00254 #if defined(LIBAVCODEC_BUILD) && (LIBAVCODEC_BUILD >= 4702) // rev 2772
00255   itsCodecContext->thread_count= 1;
00256 #endif
00257 
00258 #if defined(INVT_FFMPEG_HAS_DEFAULTS_FUNCTIONS)
00259   avcodec_get_frame_defaults(&itsPicture);
00260 #else
00261   {
00262     AVFrame* tmp = avcodec_alloc_frame();
00263     memcpy(&itsPicture, tmp, sizeof(AVFrame));
00264     av_free(tmp);
00265   }
00266 #endif
00267 
00268   LINFO("libavcodec build %d (%d.%d.%d)",
00269         int(LIBAVCODEC_BUILD),
00270         int((LIBAVCODEC_BUILD & 0xff0000) >> 16),
00271         int((LIBAVCODEC_BUILD & 0xff00) >> 8),
00272         int((LIBAVCODEC_BUILD & 0xff) >> 0));
00273 
00274   LINFO("libavformat build %d (%d.%d.%d)",
00275         int(LIBAVFORMAT_BUILD),
00276         int((LIBAVFORMAT_BUILD & 0xff0000) >> 16),
00277         int((LIBAVFORMAT_BUILD & 0xff00) >> 8),
00278         int((LIBAVFORMAT_BUILD & 0xff) >> 0));
00279 
00280   char buf[512];
00281   avcodec_string(&buf[0], sizeof(buf), itsCodecContext, /*encode=*/ 0);
00282   buf[sizeof(buf)-1] = '\0';
00283   LINFO("%s [%s]", fname, &buf[0]);
00284 
00285   if (preload)
00286     while (getNextPacket() == true)
00287       { /* empty */ }
00288 }
00289 
00290 // ######################################################################
00291 FfmpegPacketDecoder::~FfmpegPacketDecoder()
00292 {
00293   if (itsCodecContext)
00294     {
00295       avcodec_close(itsCodecContext);
00296       itsCodecContext = 0;
00297       itsStreamID = -1;
00298     }
00299 
00300   if (itsFormatContext)
00301     {
00302       av_close_input_file(itsFormatContext);
00303       itsFormatContext = NULL; /* safety */
00304     }
00305 
00306   while (!itsPacketTrashQ.empty())
00307     {
00308       av_free_packet(&(itsPacketTrashQ.back()));
00309       itsPacketTrashQ.pop_back();
00310     }
00311 }
00312 
00313 // ######################################################################
00314 int FfmpegPacketDecoder::apparentFrameNumber() const
00315 {
00316   return
00317     itsNextFramePushback
00318     ? itsFrameNumber - 1
00319     : itsFrameNumber;
00320 }
00321 
00322 // ######################################################################
00323 GenericFrameSpec FfmpegPacketDecoder::peekFrameSpec()
00324 {
00325   if (!itsDimsValid)
00326     {
00327       // if we've already peeked at the next frame, then the dims
00328       // should have already be valid:
00329       ASSERT(!itsNextFramePushback);
00330 
00331       if (readRawFrame() == 0)
00332         {
00333           if (itsCodecContext->width % 16 != 0)
00334             LFATAL("readRawFrame() failed (this may be because the "
00335                    "movie's width is %d, which is not divisible by 16)",
00336                    itsCodecContext->width);
00337           else if (itsCodecContext->height % 16 != 0)
00338             LFATAL("readRawFrame() failed (this may be because the "
00339                    "movie's height is %d, which is not divisible by 16)",
00340                    itsCodecContext->height);
00341           else
00342             LFATAL("readRawFrame() failed");
00343         }
00344 
00345       itsNextFramePushback = true;
00346     }
00347 
00348   ASSERT(itsDimsValid);
00349 
00350   GenericFrameSpec result;
00351 
00352   result.nativeType = GenericFrame::VIDEO;
00353   result.videoFormat =
00354     convertAVPixelFormatToVideoFormat(itsCodecContext->pix_fmt);
00355   result.videoByteSwap = false;
00356   result.dims = Dims(itsCodecContext->width, itsCodecContext->height);
00357   result.floatFlags = 0;
00358 
00359 #if defined(LIBAVCODEC_BUILD) && (LIBAVCODEC_BUILD >= 4754) // SVN rev >= 4168
00360   result.frameRate = static_cast<float>(1/av_q2d(itsCodecContext->time_base)) ;
00361 #else // assume FFmpeg libavcodec build 4753 or earlier (i.e., SVN rev <= 4161)
00362   result.frameRate = itsCodecContext->frame_rate ;
00363 #endif
00364 
00365   return result;
00366 }
00367 
00368 // ######################################################################
00369 VideoFrame FfmpegPacketDecoder::readVideoFrame()
00370 {
00371   // note that we need to force the peekFrameSpec() call to occur
00372   // before the convertAVFrameToVideoFrame() call, so that
00373   // itsCodecContext->{width,height} are properly initialized
00374   const GenericFrameSpec spec = this->peekFrameSpec();
00375 
00376   return convertAVFrameToVideoFrame(this->readRawFrame(),
00377                                     itsCodecContext->pix_fmt,
00378                                     spec.dims);
00379 }
00380 
00381 // ######################################################################
00382 Image<PixRGB<byte> > FfmpegPacketDecoder::readRGB()
00383 {
00384   // note that we need to force the peekFrameSpec() call to occur
00385   // before the convertAVFrameToRGB() call, so that
00386   // itsCodecContext->{width,height} are properly initialized
00387   const GenericFrameSpec spec = this->peekFrameSpec();
00388 
00389   return convertAVFrameToRGB(this->readRawFrame(),
00390                              itsCodecContext->pix_fmt,
00391                              spec.dims);
00392 }
00393 
00394 // ######################################################################
00395 bool FfmpegPacketDecoder::readAndDiscardFrame()
00396 {
00397   return (readRawFrame() != 0);
00398 }
00399 
00400 // ######################################################################
00401 bool FfmpegPacketDecoder::getNextPacket()
00402 {
00403 #if !(defined(LIBAVFORMAT_BUILD) && (LIBAVFORMAT_BUILD >= 4610))
00404   LFATAL("you must have <ffmpeg/avformat.h> with "
00405          "LIBAVFORMAT_BUILD >= 4610 to use FfmpegPacketDecoder");
00406   /* can't happen */ return false;
00407 #else
00408   if (eof_reached(itsFormatContext))
00409     return false;
00410 
00411   while (true)
00412     {
00413       AVPacket pkt;
00414       av_init_packet(&pkt);
00415       const int ret = av_read_frame(itsFormatContext, &pkt);
00416       if (ret < 0)
00417         {
00418           return false;
00419         }
00420       else
00421         {
00422           LDEBUG("eof_reached=%d pkt = {data=%p size=%d "
00423                  "stream_index=%d flags=%d duration=%d}",
00424                  int(eof_reached(itsFormatContext)),
00425                  pkt.data, pkt.size, pkt.stream_index,
00426                  pkt.flags, pkt.duration);
00427 
00428           if (pkt.stream_index == itsStreamID &&
00429               !eof_reached(itsFormatContext))
00430             {
00431               av_dup_packet(&pkt);
00432               itsPacketQ.push_back(pkt);
00433               return true;
00434             }
00435           else
00436             {
00437               av_free_packet(&pkt);
00438               // no return here; let the loop go around again and try
00439               // to get another packet
00440             }
00441         }
00442     }
00443 #endif
00444 }
00445 
00446 // ######################################################################
00447 AVFrame* FfmpegPacketDecoder::readRawFrame()
00448 {
00449   if (itsNextFramePushback)
00450     {
00451       itsNextFramePushback = false;
00452       return &itsPicture;
00453     }
00454 
00455   if (itsPacketsExhausted)
00456     return 0;
00457 
00458   while (!itsPacketTrashQ.empty())
00459     {
00460       av_free_packet(&(itsPacketTrashQ.back()));
00461       itsPacketTrashQ.pop_back();
00462     }
00463 
00464   while (true)
00465     {
00466       this->getNextPacket();
00467 
00468       if (itsPacketQ.size() == 0)
00469         {
00470           // ok, we are out of packets, but try to read one more frame
00471           // (mpeg-1 streams have a latency of one frame)
00472           int got_picture;
00473           const int len =
00474             avcodec_decode_video(itsCodecContext,
00475                                  &itsPicture, &got_picture,
00476                                  NULL, 0);
00477 
00478           (void) len;
00479 
00480           itsPacketsExhausted = true;
00481 
00482           if (got_picture)
00483             {
00484               ++itsFrameNumber;
00485               itsDimsValid = true;
00486               return &itsPicture;
00487             }
00488 
00489           return 0;
00490         }
00491 
00492       // ok, if we got here then our packet queue is non-empty, so
00493       // let's pull out the next packet and handle it:
00494 
00495       ASSERT(itsPacketQ.size() > 0);
00496       AVPacket pkt = itsPacketQ.front();
00497       itsPacketQ.pop_front();
00498 
00499       ASSERT(pkt.stream_index == itsStreamID);
00500 
00501       int got_picture;
00502       const int len =
00503         avcodec_decode_video(itsCodecContext,
00504                              &itsPicture, &got_picture,
00505                              pkt.data, pkt.size);
00506 
00507       // we can't call av_free_packet(&pkt) just now because
00508       // itsPicture may refer to pkt.data internally, so if we freed
00509       // the packet we could have dangling pointers in itsPicture;
00510       // instead, we just note that the packet must be freed later and
00511       // then we free it when we start to read the next frame
00512       itsPacketTrashQ.push_back(pkt);
00513 
00514       if (len < 0)
00515         LFATAL("avcodec_decode_video() failed");
00516 
00517       if (got_picture)
00518         {
00519           ++itsFrameNumber;
00520           itsDimsValid = true;
00521           return &itsPicture;
00522         }
00523     }
00524 }
00525 
00526 #endif // INVT_HAVE_AVCODEC
00527 
00528 // ######################################################################
00529 /* So things look consistent in everyone's emacs... */
00530 /* Local Variables: */
00531 /* mode: c++ */
00532 /* indent-tabs-mode: nil */
00533 /* End: */
00534 
00535 #endif // MEDIA_FFMPEGPACKETDECODER_C_DEFINED