00001 /*!@file Media/FfmpegDecoder.C Low-level class for using ffmpeg to decode movie files */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00005 // by the University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Rob Peters <rjpeters at usc dot edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Media/FfmpegDecoder.C $ 00035 // $Id: FfmpegDecoder.C 12611 2010-01-20 22:49:23Z beobot $ 00036 // 00037 00038 #ifndef MEDIA_FFMPEGDECODER_C_DEFINED 00039 #define MEDIA_FFMPEGDECODER_C_DEFINED 00040 00041 #ifdef INVT_HAVE_AVCODEC 00042 00043 #include "Media/FfmpegDecoder.H" 00044 00045 #include "Image/Image.H" 00046 #include "Image/Pixels.H" 00047 #include "Raster/GenericFrame.H" 00048 #include "Util/Assert.H" 00049 #include "Util/log.H" 00050 #include "Video/FfmpegFrame.H" 00051 #include "Video/VideoFrame.H" 00052 #include "rutz/trace.h" 00053 00054 #include <cstdlib> 00055 #define attribute_deprecated 00056 00057 extern "C" 00058 { 00059 //These directives are necessary to handle the various places in which 00060 //different versions of ffmpeg install their files. Unfortunately, 00061 //it looks like the cdeps program can't handle the more elegant 00062 //#if defined(XXX) #elif defined(XXX) #endif, so we have to do the following: 00063 00064 #ifdef HAVE_LIBAVCODEC_AVCODEC_H 00065 #include <libavcodec/avcodec.h> 00066 #else 00067 #ifdef HAVE_FFMPEG_AVCODEC_H 00068 #include <ffmpeg/avcodec.h> 00069 #endif 00070 #endif 00071 00072 #ifdef HAVE_LIBAVFORMAT_AVFORMAT_H 00073 #include <libavformat/avformat.h> 00074 #else 00075 #ifdef HAVE_FFMPEG_AVFORMAT_H 00076 #include <ffmpeg/avformat.h> 00077 #endif 00078 #endif 00079 } 00080 00081 #include <sys/stat.h> 00082 #include <sys/types.h> 00083 #include <termios.h> 00084 00085 namespace 00086 { 00087 bool isNonNegative(int v) { return v >= 0; } 00088 bool isNonNegative(unsigned int v) { return true; } 00089 00090 template <class T> 00091 Image<T> vFlip(const Image<T>& src) 00092 { 00093 GVX_TRACE(__PRETTY_FUNCTION__); 00094 00095 Image<T> result(src.getDims(), NO_INIT); 00096 00097 const int w = src.getWidth(); 00098 const int h = src.getHeight(); 00099 00100 const T* sptr = src.getArrayPtr(); 00101 T* dptr = result.getArrayPtr() + (h-1)*w; 00102 00103 for (int y = 0; y < h; ++y) 00104 { 00105 safecopy(dptr, sptr, w); 00106 sptr += w; 00107 dptr -= w; 00108 } 00109 00110 return result; 00111 } 00112 00113 AVCodec* findVideoCodec(const char* fname, AVInputFormat* iformat) 00114 { 00115 AVFormatContext* ic; 00116 int err = av_open_input_file(&ic, fname, iformat, 0, NULL); 00117 if (err < 0) 00118 LFATAL("Error opening input file %s: %d", fname, err); 00119 00120 err = av_find_stream_info(ic); 00121 if (err < 0) 00122 LFATAL("Cannot determine stream codec parameters: %d", err); 00123 00124 LINFO("Using input format '%s' (%s)", 00125 ic->iformat->name, ic->iformat->long_name); 00126 00127 AVCodec* result = 0; 00128 00129 // Prior to ffmpeg svn revision 7556, nb_streams was 'int', but then 00130 // changed to 'unsigned int'; to allow either type we will later 00131 // cast it to unsigned int, but first we want to check that it is 00132 // indeed non-negative: 00133 ASSERT(isNonNegative(ic->nb_streams)); 00134 00135 for (unsigned int i = 0; i < (unsigned int)(ic->nb_streams); ++i) 00136 { 00137 #ifdef INVT_FFMPEG_AVSTREAM_CODEC_IS_POINTER 00138 const AVCodecContext* const cc = ic->streams[i]->codec; 00139 #else 00140 const AVCodecContext* const cc = ic->streams[i]->codec; 00141 #endif 00142 if (cc->codec_type == CODEC_TYPE_VIDEO) 00143 { 00144 result = avcodec_find_decoder(cc->codec_id); 00145 if (result == 0) 00146 LFATAL("codec not found"); 00147 break; 00148 } 00149 } 00150 00151 av_close_input_file(ic); 00152 00153 if (result == 0) 00154 LFATAL("Could not find a video stream in input file %s", fname); 00155 00156 return result; 00157 } 00158 } 00159 00160 // ###################################################################### 00161 FfmpegDecoder::FfmpegDecoder(const char* codecname, 00162 const int bufflen, 00163 const char* fname, 00164 const bool preload) 00165 : 00166 itsFile(NULL), 00167 itsContext(), 00168 itsPicture(), 00169 itsFrameNumber(0), 00170 itsBuf(), 00171 itsIdxStart(0), 00172 itsIdxEnd(0), 00173 itsInputEOF(false), 00174 itsDimsValid(false), 00175 itsNextFramePushback(false) 00176 { 00177 GVX_TRACE(__PRETTY_FUNCTION__); 00178 00179 // no need to guard these functions for being called multiple times; 00180 // they all have internal guards 00181 av_register_all(); 00182 avcodec_init(); 00183 avcodec_register_all(); 00184 00185 AVInputFormat* iformat = NULL; 00186 if (strcmp(codecname, "List") == 0) 00187 { 00188 // list available codecs 00189 LINFO("##### Available input codecs (not all may work for video):"); 00190 for(AVInputFormat* f = first_iformat; f != NULL; f = f->next) 00191 LINFO("%s: %s %d", f->name, f->long_name, f->flags); 00192 LFATAL("Please select a codec from this list"); 00193 } 00194 else if (strcmp(codecname, "Auto") != 0) 00195 { 00196 // format is given 00197 iformat = av_find_input_format(codecname); 00198 } 00199 00200 // ok, let's find a video stream: 00201 AVCodec* const codec = findVideoCodec(fname, iformat); 00202 00203 ASSERT(codec != 0); 00204 00205 #if defined(INVT_FFMPEG_HAS_DEFAULTS_FUNCTIONS) 00206 avcodec_get_context_defaults(&itsContext); 00207 #else 00208 { 00209 AVCodecContext* const tmp = avcodec_alloc_context(); 00210 memcpy(&itsContext, tmp, sizeof(AVCodecContext)); 00211 free(tmp); 00212 } 00213 #endif 00214 00215 #if defined(INVT_FFMPEG_HAS_DEFAULTS_FUNCTIONS) 00216 avcodec_get_frame_defaults(&itsPicture); 00217 #else 00218 { 00219 AVFrame* tmp = avcodec_alloc_frame(); 00220 memcpy(&itsPicture, tmp, sizeof(AVFrame)); 00221 free(tmp); 00222 } 00223 #endif 00224 00225 if (codec->capabilities & CODEC_CAP_TRUNCATED) 00226 itsContext.flags |= CODEC_FLAG_TRUNCATED; 00227 00228 if (avcodec_open(&itsContext, codec) < 0) 00229 LFATAL("could not open codec\n"); 00230 00231 // open the stream: 00232 if (itsFile) fclose(itsFile); 00233 itsFile = fopen(fname, "rb"); 00234 if (itsFile == NULL) 00235 LFATAL("could not open file! %s", fname); 00236 00237 // get a read buffer: 00238 int blen; 00239 if (preload) // allocate a buffer for the entire movie 00240 { 00241 struct stat st; 00242 const int fd = fileno(itsFile); 00243 if (fd == -1) PLFATAL("Problem with fileno()"); 00244 if (fstat(fd, &st) == -1) PLFATAL("Cannot stat %s", fname); 00245 blen = int(st.st_size); 00246 } 00247 else 00248 blen = bufflen; // allocate a buffer for a chunk of movie 00249 00250 itsBuf.resize(blen); 00251 itsIdxStart = 0; 00252 itsIdxEnd = 0; 00253 itsFrameNumber = 0; 00254 itsInputEOF = false; 00255 itsDimsValid = false; 00256 00257 LINFO("libavcodec build %d (%d.%d.%d)", 00258 int(LIBAVCODEC_BUILD), 00259 int((LIBAVCODEC_BUILD & 0xff0000) >> 16), 00260 int((LIBAVCODEC_BUILD & 0xff00) >> 8), 00261 int((LIBAVCODEC_BUILD & 0xff) >> 0)); 00262 00263 LINFO("libavformat build %d (%d.%d.%d)", 00264 int(LIBAVFORMAT_BUILD), 00265 int((LIBAVFORMAT_BUILD & 0xff0000) >> 16), 00266 int((LIBAVFORMAT_BUILD & 0xff00) >> 8), 00267 int((LIBAVFORMAT_BUILD & 0xff) >> 0)); 00268 00269 char buf[512]; 00270 avcodec_string(&buf[0], sizeof(buf), &itsContext, /*encode=*/ 0); 00271 buf[sizeof(buf)-1] = '\0'; 00272 LINFO("%s [%s]", fname, &buf[0]); 00273 00274 // if preload, let's load up the entire movie now and close itsFile: 00275 if (preload) 00276 { 00277 const int size = fread(&itsBuf[0], 1, itsBuf.size(), itsFile); 00278 if (size <= 0) PLFATAL("Read error"); 00279 itsIdxEnd = size_t(size); 00280 // close the stream since we have all the data already: 00281 fclose(itsFile); 00282 itsFile = NULL; 00283 LINFO("pre-loaded %s", fname); 00284 } 00285 } 00286 00287 // ###################################################################### 00288 FfmpegDecoder::~FfmpegDecoder() 00289 { 00290 GVX_TRACE(__PRETTY_FUNCTION__); 00291 00292 if (itsFile) { fclose(itsFile); } 00293 avcodec_close(&itsContext); 00294 } 00295 00296 // ###################################################################### 00297 int FfmpegDecoder::apparentFrameNumber() const 00298 { 00299 GVX_TRACE(__PRETTY_FUNCTION__); 00300 00301 return 00302 itsNextFramePushback 00303 ? itsFrameNumber - 1 00304 : itsFrameNumber; 00305 } 00306 00307 // ###################################################################### 00308 GenericFrameSpec FfmpegDecoder::peekFrameSpec() 00309 { 00310 GVX_TRACE(__PRETTY_FUNCTION__); 00311 00312 if (!itsDimsValid) 00313 { 00314 // if we've already peeked at the next frame, then the dims 00315 // should have already be valid: 00316 ASSERT(!itsNextFramePushback); 00317 00318 readRawFrame(); 00319 itsNextFramePushback = true; 00320 } 00321 00322 ASSERT(itsDimsValid); 00323 00324 GenericFrameSpec result; 00325 00326 result.nativeType = GenericFrame::VIDEO; 00327 result.videoFormat = 00328 convertAVPixelFormatToVideoFormat(itsContext.pix_fmt); 00329 result.videoByteSwap = false; 00330 result.dims = Dims(itsContext.width, itsContext.height); 00331 result.floatFlags = 0; 00332 00333 #if defined(LIBAVCODEC_BUILD) && (LIBAVCODEC_BUILD >= 4754) // SVN rev >= 4168 00334 result.frameRate = static_cast<float>(1/av_q2d(itsContext.time_base)) ; 00335 #else // assume FFmpeg libavcodec build 4753 or earlier (i.e., SVN rev <= 4161) 00336 result.frameRate = itsContext.frame_rate ; 00337 #endif 00338 00339 return result; 00340 } 00341 00342 // ###################################################################### 00343 VideoFrame FfmpegDecoder::readVideoFrame() 00344 { 00345 // note that we need to force the peekFrameSpec() call to occur 00346 // before the convertAVFrameToVideoFrame() call, so that 00347 // itsContext.{width,height} are properly initialized 00348 const GenericFrameSpec spec = this->peekFrameSpec(); 00349 00350 return convertAVFrameToVideoFrame(this->readRawFrame(), 00351 itsContext.pix_fmt, 00352 spec.dims); 00353 } 00354 00355 // ###################################################################### 00356 Image<PixRGB<byte> > FfmpegDecoder::readRGB() 00357 { 00358 // note that we need to force the peekFrameSpec() call to occur 00359 // before the convertAVFrameToRGB() call, so that 00360 // itsContext.{width,height} are properly initialized 00361 const GenericFrameSpec spec = this->peekFrameSpec(); 00362 00363 return convertAVFrameToRGB(this->readRawFrame(), 00364 itsContext.pix_fmt, 00365 spec.dims); 00366 } 00367 00368 // ###################################################################### 00369 bool FfmpegDecoder::readAndDiscardFrame() 00370 { 00371 return (readRawFrame() != 0); 00372 } 00373 00374 // ###################################################################### 00375 const AVFrame* FfmpegDecoder::readRawFrame() 00376 { 00377 GVX_TRACE(__PRETTY_FUNCTION__); 00378 00379 if (itsNextFramePushback) 00380 { 00381 itsNextFramePushback = false; 00382 return &itsPicture; 00383 } 00384 00385 if (itsInputEOF) return NULL; // we have reached end of file already 00386 00387 int nlen0 = 0; 00388 00389 while (true) 00390 { 00391 bool goteof = false; 00392 00393 ASSERT(itsIdxEnd >= itsIdxStart); 00394 00395 // do we need to read more data from file? 00396 if (itsIdxEnd < itsIdxStart + 16384) 00397 { 00398 const size_t size = this->refillBuffer(); 00399 if (size == 0) goteof = true; // end of file 00400 } 00401 00402 LDEBUG("buffer range = %"ZU" - %"ZU" of %"ZU", goteof=%d", 00403 itsIdxStart, itsIdxEnd, itsBuf.size(), int(goteof)); 00404 00405 // decode contents of our read buffer if any: 00406 int gotpic = 0; 00407 const int len = 00408 avcodec_decode_video(&itsContext, &itsPicture, &gotpic, 00409 &itsBuf[itsIdxStart], 00410 itsIdxEnd - itsIdxStart); 00411 00412 if (len == 0) ++nlen0; 00413 else nlen0 = 0; 00414 00415 LDEBUG("end-start=%"ZU", len=%d, nlen0=%d, gotpic=%d", 00416 itsIdxEnd-itsIdxStart, len, nlen0, gotpic); 00417 00418 if (len < 0) 00419 LFATAL("Error while decoding frame %d", itsFrameNumber); 00420 else if (size_t(len) > (itsIdxEnd - itsIdxStart)) 00421 { 00422 const size_t minsize = 00423 std::max(4*size_t(len)+4096, 00424 4*(itsIdxEnd-itsIdxStart)+4096); 00425 00426 if (minsize > itsBuf.size()) 00427 // ok, libavcodec needs a bigger buffer in order to be 00428 // able to hold a full frame, so let's do that now: 00429 itsBuf.resize(minsize); 00430 00431 // ok, the decoder wants more data... 00432 const size_t size = this->refillBuffer(); 00433 if (size == 0) 00434 LFATAL("libavcodec wanted more data, but we are at eof"); 00435 } 00436 else 00437 { 00438 itsIdxStart += len; 00439 if ((itsIdxStart == itsIdxEnd) && goteof && (gotpic || nlen0 >= 2)) 00440 itsInputEOF = true; // decoded last frame 00441 if ((itsIdxStart == itsIdxEnd) && goteof && (nlen0 >= 2)) 00442 return NULL; 00443 if (gotpic) 00444 { 00445 ++itsFrameNumber; 00446 itsDimsValid = true; 00447 return &itsPicture; 00448 } 00449 } 00450 } 00451 } 00452 00453 // ###################################################################### 00454 size_t FfmpegDecoder::refillBuffer() 00455 { 00456 // let's move the data we have up to the front of the buffer, and 00457 // then fill the buffer again 00458 const size_t nsave = itsIdxEnd - itsIdxStart; 00459 ASSERT(itsBuf.size() > nsave); 00460 00461 if (nsave > 0) 00462 memmove(&itsBuf[0], &itsBuf[itsIdxStart], nsave); 00463 itsIdxStart = 0; 00464 itsIdxEnd = nsave; 00465 00466 // if our file is already closed (e.g. if we preloaded the entire 00467 // movie), then we can't read any more data, so just return 0: 00468 if (itsFile == 0) 00469 return 0; 00470 00471 const int size = fread(&itsBuf[0] + nsave, 1, 00472 itsBuf.size() - nsave, itsFile); 00473 if (size < 0) 00474 PLFATAL("read error"); 00475 itsIdxEnd += size; 00476 return size_t(size); 00477 } 00478 00479 #endif // INVT_HAVE_AVCODEC 00480 00481 // ###################################################################### 00482 /* So things look consistent in everyone's emacs... */ 00483 /* Local Variables: */ 00484 /* mode: c++ */ 00485 /* indent-tabs-mode: nil */ 00486 /* End: */ 00487 00488 #endif // MEDIA_FFMPEGDECODER_C_DEFINED