FfmpegEncoder.C

Go to the documentation of this file.
00001 /*!@file Media/FfmpegEncoder.C Low-level class for using ffmpeg to decode movie files */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005   //
00005 // by the University of Southern California (USC) and the iLab at USC.  //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: Rob Peters <rjpeters at usc dot edu>
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Media/FfmpegEncoder.C $
00035 // $Id: FfmpegEncoder.C 12962 2010-03-06 02:13:53Z irock $
00036 //
00037 
00038 
00039 #include "Media/FfmpegEncoder.H"
00040 
00041 #include "Image/Image.H"
00042 #include "Image/Pixels.H"
00043 #include "Image/color_conversions.H" // for rgb24_to_yv12_c()
00044 #include "Raster/GenericFrame.H"
00045 #include "Util/log.H"
00046 #include "Video/FfmpegFrame.H"
00047 #include "Video/VideoFrame.H"
00048 #include "rutz/arrays.h"
00049 #include "rutz/trace.h"
00050 
00051 #ifdef INVT_HAVE_AVCODEC
00052 
00053 // ######################################################################
00054 FfmpegEncoder::FfmpegEncoder(const std::string& fname,
00055                              const std::string& codecname,
00056                              const int bitrate,
00057                              const int framerate,
00058                              const int frameratebase,
00059                              const Dims& dims,
00060                              const int bufsz,
00061                              const bool useFormatContext)
00062   :
00063   itsFile(0),
00064   itsContext(),
00065         itsFormatContext(0),
00066   itsFrameNumber(0),
00067   itsOutbufSize(bufsz),
00068   itsFrameSizeRange(),
00069         itsUseFormatContext(useFormatContext)
00070 {
00071   GVX_TRACE(__PRETTY_FUNCTION__);
00072 
00073   // no need to guard these functions for being called multiple times;
00074   // they all have internal guards
00075   av_register_all();
00076   avcodec_init();
00077   avcodec_register_all();
00078 
00079   AVOutputFormat* oformat = NULL;
00080   if (codecname.compare("List") == 0) { // list available codecs
00081     LINFO("##### Available output codecs (not all may work for video):");
00082     for(AVOutputFormat* f = first_oformat; f != NULL; f = f->next)
00083       LINFO("%s: %s %d", f->name, f->long_name, f->flags);
00084     LFATAL("Please select a codec from this list");
00085   } else { // format is given
00086     // no av_find_output_format()?? let's do it by hand...
00087     for(AVOutputFormat* f = first_oformat; f != NULL; f = f->next)
00088       if (codecname.compare(f->name) == 0)
00089         { oformat = f; break; }
00090   }
00091 
00092   if (oformat == 0)
00093     LFATAL("No such video codec '%s';\n"
00094            "try re-running with --output-codec=List to see a list\n"
00095            "of available codecs", codecname.c_str());
00096 
00097   char ext[100]; ext[0] = '.'; uint i;
00098   for (i = 0; i < strlen(oformat->extensions); i ++)
00099     if (oformat->extensions[i] == ',') break;
00100     else ext[i+1] = oformat->extensions[i];
00101   ext[i+1] = '\0';
00102   LINFO("Using output format '%s' (%s), extension %s", oformat->name,
00103         oformat->long_name, ext);
00104 
00105   std::string oname(fname);
00106   std::string::size_type idx1 = oname.rfind('/', oname.npos);
00107   std::string::size_type idx2 = oname.rfind('.', oname.npos);
00108   // must check that idx2 is valid; otherwise if we do
00109   // oname.erase(idx2) with e.g. idx2==npos then we will get a
00110   // std::out_of_range exception
00111   if (idx2 < oname.size() && idx2 > idx1)
00112     oname.erase(idx2, oname.npos);
00113   oname.append(ext);
00114   LINFO("Output file: %s", oname.c_str());
00115 
00116         if (itsUseFormatContext)
00117         {
00118 #ifdef INVT_FFMPEG_HAS_FORMATCONTEXT_FUNCTIONS
00119                 LINFO("Using FormatContext to output data");
00120                 itsFormatContext = av_alloc_format_context();
00121                 if (!itsFormatContext)
00122                         LFATAL("Cannot allocate format context");
00123                 itsFormatContext->oformat = oformat;
00124 
00125                 itsAVStream = av_new_stream(itsFormatContext, 0);
00126                 if (!itsAVStream)
00127                         LFATAL("Can not allocate AVStream");
00128 #else
00129     LFATAL("Need a new version of ffmpeg libs for this option");
00130     itsFormatContext = NULL;
00131 #endif
00132         }
00133 
00134   AVCodec* const codec = avcodec_find_encoder(oformat->video_codec);
00135   if (codec == NULL)  LFATAL("codec not found");
00136 
00137 #if defined(INVT_FFMPEG_HAS_DEFAULTS_FUNCTIONS)
00138   avcodec_get_context_defaults(&itsContext);
00139 #else
00140   {
00141     AVCodecContext* const tmp = avcodec_alloc_context();
00142     memcpy(&itsContext, tmp, sizeof(AVCodecContext));
00143     free(tmp);
00144   }
00145 #endif
00146 
00147   itsContext.bit_rate = bitrate;
00148 
00149   // Be sure to set itsContext.pix_fmt -- it may occasionally
00150   // appear to work to leave pix_fmt unset, because the value we want,
00151   // PIX_FMT_YUV420P, has the enum value of 0, so if the uninitialized
00152   // memory for pix_fmt happens to have the value 0, then we'll slip
00153   // through without setting it explicitly.
00154   itsContext.pix_fmt = PIX_FMT_YUV420P;
00155 
00156   /* resolution must be a multiple of two */
00157   itsContext.width = dims.w();
00158   itsContext.height = dims.h();
00159 #if defined(INVT_FFMPEG_AVCODECCONTEXT_HAS_TIME_BASE)
00160   AVRational time_base = { frameratebase, framerate };
00161   itsContext.time_base = time_base;
00162   const int frb = frameratebase;
00163 #elif LIBAVCODEC_VERSION_INT >= 0x000406 && LIBAVCODEC_BUILD > 4665
00164   itsContext.frame_rate = framerate;
00165   const int frb = frameratebase;
00166   itsContext.frame_rate_base = frb;
00167 #else
00168   itsContext.frame_rate = framerate;
00169   const int frb = FRAME_RATE_BASE;
00170 #endif
00171   itsContext.gop_size = 10; /* emit one intra frame every ten frames */
00172 
00173   if(codec->id != CODEC_ID_MPEG4 &&
00174      codec->id != CODEC_ID_MPEG1VIDEO &&
00175      codec->id != CODEC_ID_MPEG2VIDEO)
00176     itsContext.max_b_frames = 0;
00177   else
00178     itsContext.max_b_frames = 1;
00179 
00180   itsFrameNumber = 0;
00181 
00182   LINFO("using max_b_frames=%i bitrate=%u width=%u height=%u framerate=%u frameratebase=%u",
00183         itsContext.max_b_frames, itsContext.bit_rate, itsContext.width, itsContext.height, framerate, frb);
00184 
00185   if (avcodec_open(&itsContext, codec) < 0)
00186     LFATAL("could not open codec\n");
00187 
00188         if (itsUseFormatContext)
00189         {
00190 #ifdef INVT_FFMPEG_HAS_FORMATCONTEXT_FUNCTIONS
00191                 AVCodecContext *c = itsAVStream->codec;
00192                 c->codec_id = itsContext.codec_id;
00193                 c->codec_type = CODEC_TYPE_VIDEO;
00194 
00195                 /* put sample parameters */
00196                 c->bit_rate = itsContext.bit_rate;
00197                 /* resolution must be a multiple of two */
00198                 c->width = itsContext.width;
00199                 c->height = itsContext.height;
00200                 /* time base: this is the fundamental unit of time (in seconds) in terms
00201                          of which frame timestamps are represented. for fixed-fps content,
00202                          timebase should be 1/framerate and timestamp increments should be
00203                          identically 1. */
00204 #if defined(INVT_FFMPEG_AVCODECCONTEXT_HAS_TIME_BASE)
00205                 c->time_base.den = itsContext.time_base.den;
00206                 c->time_base.num = itsContext.time_base.num;
00207 #endif
00208                 c->gop_size = 12; /* emit one intra frame every twelve frames at most */
00209                 c->pix_fmt = itsContext.pix_fmt;
00210 
00211                 /* set the output parameters (must be done even if no
00212                          parameters). */
00213                 if (av_set_parameters(itsFormatContext, NULL) < 0)
00214                         LFATAL("Invalid output format parameters");
00215 
00216 #if defined(INVT_FFMPEG_URL_OPEN_FUNC_TAKES_SINGLE_POINTER)
00217 
00218 #if defined(INVT_FFMPEG_AVFORMATCONTEXT_BYTEIO_ISPOINTER)
00219                 if (url_fopen(itsFormatContext->pb, oname.c_str(), URL_WRONLY) < 0)
00220                         LFATAL("Could not open '%s'", oname.c_str());
00221 #else
00222                 if (url_fopen(&itsFormatContext->pb, oname.c_str(), URL_WRONLY) < 0)
00223                         LFATAL("Could not open '%s'", oname.c_str());
00224 #endif
00225 
00226 #else
00227 
00228 #if defined(INVT_FFMPEG_AVFORMATCONTEXT_BYTEIO_ISPOINTER)
00229                 if (url_fopen(&itsFormatContext->pb, oname.c_str(), URL_WRONLY) < 0)
00230                         LFATAL("Could not open '%s'", oname.c_str());
00231 #else
00232                         LFATAL("Could not open '%s' ffmpeg version mismatch", oname.c_str());
00233 #endif
00234 
00235 #endif //INVT_FFMPEG_URL_OPEN_FUNC_TAKES_SINGLE_POINTER)
00236 
00237 
00238 
00239                 /* write the stream header, if any */
00240                 av_write_header(itsFormatContext);
00241 #else
00242     LFATAL("Need a new version of FFMPEG for this option");
00243 #endif
00244         } else {
00245                 itsFile = fopen(oname.c_str(), "w");
00246                 if (itsFile==NULL)
00247                   LFATAL("could not open file! %s", oname.c_str());
00248         }
00249 
00250   LINFO("EnCoder Inited");
00251 }
00252 
00253 FfmpegEncoder::~FfmpegEncoder()
00254 {
00255   close();
00256 }
00257 
00258 int FfmpegEncoder::close()
00259 {
00260 GVX_TRACE(__PRETTY_FUNCTION__);
00261 
00262   if (itsUseFormatContext)
00263         {
00264                 //if we went through this function already, then all the memory is freed
00265                 if (itsFormatContext  == NULL)
00266                         return 0;
00267         } else {
00268                 if (itsFile == NULL)
00269                   return 0;
00270         }
00271 
00272 
00273   // (1) write any "delayed frames"
00274 {
00275         byte* const outbuf = (byte*) calloc(itsOutbufSize, 1);
00276 
00277         if (outbuf != 0)
00278         {
00279                 while (true)
00280                 {
00281                         LINFO("pre  frame number %d", itsContext.frame_number);
00282 
00283                         const int out_size =
00284                                 avcodec_encode_video(&itsContext, outbuf,
00285                                                 itsOutbufSize, NULL);
00286 
00287                         if (out_size <= 0)
00288                                 break;
00289 
00290                         itsFrameSizeRange.merge(out_size);
00291 
00292                         if (itsUseFormatContext)
00293                         {
00294 #ifdef INVT_FFMPEG_HAS_FORMATCONTEXT_FUNCTIONS
00295                                 if (out_size > 0)
00296                                 {
00297                                         AVPacket pkt;
00298                                         av_init_packet(&pkt);
00299 
00300 #if defined(INVT_FFMPEG_AVCODECCONTEXT_HAS_TIME_BASE)
00301                                         pkt.pts= av_rescale_q(itsContext.coded_frame->pts,
00302                                                         itsContext.time_base, itsAVStream->time_base);
00303 #endif
00304                                         if(itsContext.coded_frame->key_frame)
00305                                                 pkt.flags |= PKT_FLAG_KEY;
00306                                         pkt.stream_index= itsAVStream->index;
00307                                         pkt.data= outbuf;
00308                                         pkt.size= out_size;
00309 
00310                                         /* write the compressed frame in the media file */
00311                                         av_write_frame(itsFormatContext, &pkt);
00312                                 }
00313 #else
00314         LFATAL("Need a new version of ffmpeg for this option");
00315 #endif
00316                         } else {
00317                                 fwrite(outbuf, 1, out_size, itsFile);
00318                         }
00319 
00320                         LINFO("post frame number %d", itsContext.frame_number);
00321                         LINFO("delayed frame (out_size=%d)", out_size);
00322                 }
00323 
00324                 free(outbuf);
00325         }
00326 }
00327 
00328 if (!itsUseFormatContext)
00329 {
00330         //(2) add sequence end code
00331         {
00332                 char outbuf[8];
00333                 outbuf[0] = 0x00;
00334                 outbuf[1] = 0x00;
00335                 outbuf[2] = 0x01;
00336                 outbuf[3] = 0xb7;
00337                 fwrite(outbuf, 1, 4, itsFile);
00338         }
00339 }
00340 
00341 
00342 
00343   LINFO("end encoder: wrote %d frames, itsFrameSizeRange=[%d..%d]",
00344         itsFrameNumber, itsFrameSizeRange.min(), itsFrameSizeRange.max());
00345 
00346   if (itsUseFormatContext)
00347   {
00348 #ifdef INVT_FFMPEG_HAS_FORMATCONTEXT_FUNCTIONS
00349                 avcodec_close(&itsContext);
00350 
00351                 av_write_trailer(itsFormatContext);
00352 
00353                 /* free the streams */
00354                 for(uint i = 0; i < (uint)itsFormatContext->nb_streams; i++) {
00355                         av_freep(&itsFormatContext->streams[i]->codec);
00356                         av_freep(&itsFormatContext->streams[i]);
00357                 }
00358 
00359 #if defined(INVT_FFMPEG_AVFORMATCONTEXT_BYTEIO_ISPOINTER)
00360                 url_fclose(itsFormatContext->pb);
00361 #else
00362                 url_fclose(&itsFormatContext->pb);
00363 #endif
00364 
00365                 av_free(itsFormatContext);
00366                 itsFormatContext = NULL;
00367 #else
00368     LFATAL("Need a new version of ffmpeg for this option");
00369 #endif
00370         } else {
00371                 fclose(itsFile);
00372                 itsFile = NULL;
00373         }
00374 
00375   return 0;
00376 }
00377 
00378 void FfmpegEncoder::writeRawFrame(const AVFrame* picture)
00379 {
00380 GVX_TRACE(__PRETTY_FUNCTION__);
00381 
00382   // FIXME We'd like to have a way to either (1) compute what the
00383   // maximum necessary itsOutbufSize would be for our given
00384   // framerate+bitrate, or (2) get a chance to retry writing a given
00385   // frame if it is truncated. However, we have no programmatic way of
00386   // knowing whether a given frame gets truncated (all we see is that
00387   // ffmpeg prints "encoded frame too large" on stderr), but even then
00388   // the return value from avcodec_encode_video() is less than our
00389   // itsOutbufSize (although for a "too large" frame we can see
00390   // that the return value is clearly higher than usual). Also, it's
00391   // hard to determine a hard upper limit on the bufsize, even given
00392   // the framerate and bitrate, because the bitrate is only achieved
00393   // on /average/ -- so, any particular frame might be much larger
00394   // (e.g., 10x or 100x) than the average frame size. So, given all
00395   // that, our current approach is just to leave the buffer size up to
00396   // the user via the --output-mpeg-bufsize command-line option.
00397 
00398   // NOTE: it might seem extravagent to allocate+deallocate these
00399   // buffers (outbuf, and picture_buf in writeRGB()) for every single
00400   // frame that is written; however, profiling shows that this
00401   // accounts for only about 2% of the total time spent in
00402   // writeFrame(). The alternatives, both with their own
00403   // disadvantages, would be (1) have separate buffers allocated once
00404   // per object; however this would be expensive in overall memory
00405   // usage if we had multiple mpeg streams open at once; or (2) have
00406   // static buffers shared by all objects; however, this would require
00407   // some form of between-object synchronization in the case of
00408   // multi-threading which could be cpu-expensive both for the
00409   // locking+unlocking and would also waste time waiting to acquire
00410   // the lock for access to the shared buffers.
00411 
00412   rutz::fixed_block<byte> outbuf(itsOutbufSize);
00413 
00414   const int out_size = avcodec_encode_video(&itsContext,
00415                                             &outbuf[0],
00416                                             outbuf.size(),
00417                                             picture);
00418 
00419   if (out_size < 0)
00420     LFATAL("error during avcodec_encode_video()");
00421 
00422         if (out_size > 0)
00423         {
00424                 itsFrameSizeRange.merge(out_size);
00425 
00426                 if (itsUseFormatContext)
00427                 {
00428 #ifdef INVT_FFMPEG_HAS_FORMATCONTEXT_FUNCTIONS
00429                         AVPacket pkt;
00430                         av_init_packet(&pkt);
00431 
00432                         pkt.pts= av_rescale_q(itsContext.coded_frame->pts,
00433                                         itsContext.time_base, itsAVStream->time_base);
00434                         if(itsContext.coded_frame->key_frame)
00435                                 pkt.flags |= PKT_FLAG_KEY;
00436                         pkt.stream_index= itsAVStream->index;
00437                         pkt.data= &outbuf[0];
00438                         pkt.size= out_size;
00439 
00440                         /* write the compressed frame in the media file */
00441                         av_write_frame(itsFormatContext, &pkt);
00442 #else
00443       LFATAL("New a new version of ffmpeg for this option");
00444 #endif
00445                 } else {
00446                         fwrite(&outbuf[0], 1, out_size, itsFile);
00447                 }
00448         }
00449 
00450   LDEBUG("itsOutbufSize=%d, out_size=%d, frameSizeRange=[%d..%d]",
00451          itsOutbufSize, out_size,
00452          itsFrameSizeRange.min(), itsFrameSizeRange.max());
00453 
00454   LDEBUG("encoded frame [zero-based] %d (%d delayed frames pending)",
00455         itsFrameNumber,
00456         // to compute the number of pending "delayed frames", we
00457         // subtract the AVCodecContext's frame number from our own,
00458         // except that there is an offset of 2 -- one because
00459         // AVCodecContext counts from 1, while we count from zero, and
00460         // another because AVCodecContext's counter reports the number
00461         // of the NEXT frame to be written, while itsFrameNumber is
00462         // the number of the frame that has just been written
00463         itsFrameNumber - (itsContext.frame_number - 2));
00464 
00465   ++itsFrameNumber;
00466 }
00467 
00468 void FfmpegEncoder::writeRGB(const Image<PixRGB<byte> >& img)
00469 {
00470 GVX_TRACE(__PRETTY_FUNCTION__);
00471 
00472   ASSERT(PIX_FMT_YUV420P == itsContext.pix_fmt);
00473 
00474   const int size = itsContext.width * itsContext.height;
00475   const int size4 =
00476     ((itsContext.width+1)/2) * (itsContext.height/2);
00477 
00478   rutz::fixed_block<byte> picture_buf(size + 2*size4); /* size for YUV 420 */
00479 
00480   AVFrame picture;
00481 #if defined(INVT_FFMPEG_HAS_DEFAULTS_FUNCTIONS)
00482   avcodec_get_frame_defaults(&picture);
00483 #else
00484   {
00485     AVFrame* tmp = avcodec_alloc_frame();
00486     memcpy(&picture, tmp, sizeof(AVFrame));
00487     free(tmp);
00488   }
00489 #endif
00490 
00491   picture.data[0] = &picture_buf[0];
00492   picture.data[1] = &picture_buf[0] + size;
00493   picture.data[2] = &picture_buf[0] + size + size4;
00494   picture.linesize[0] = itsContext.width;
00495   picture.linesize[1] = (itsContext.width+1) / 2;
00496   picture.linesize[2] = (itsContext.width+1) / 2;
00497 
00498   if (img.getWidth() != itsContext.width ||
00499       img.getHeight() != itsContext.height)
00500     {
00501       LFATAL("wrong size mpeg output frame "
00502              "(expected %dx%d, got %dx%d)",
00503              itsContext.width, itsContext.height,
00504              img.getWidth(), img.getHeight());
00505     }
00506 
00507   rgb24_to_yv12_c(img,
00508                   picture.data[0],
00509                   picture.data[1],
00510                   picture.data[2]);
00511 
00512   this->writeRawFrame(&picture);
00513 }
00514 
00515 void FfmpegEncoder::writeVideoFrame(const VideoFrame& frame)
00516 {
00517 GVX_TRACE(__PRETTY_FUNCTION__);
00518 
00519   if (frame.getDims().w() != itsContext.width ||
00520       frame.getDims().h() != itsContext.height)
00521     {
00522       LFATAL("wrong size mpeg output frame "
00523              "(expected %dx%d, got %dx%d)",
00524              itsContext.width, itsContext.height,
00525              frame.getDims().w(), frame.getDims().h());
00526     }
00527 
00528   AVFrame picture;
00529 #if defined(INVT_FFMPEG_HAS_DEFAULTS_FUNCTIONS)
00530   avcodec_get_frame_defaults(&picture);
00531 #else
00532   {
00533     AVFrame* tmp = avcodec_alloc_frame();
00534     memcpy(&picture, tmp, sizeof(AVFrame));
00535     free(tmp);
00536   }
00537 #endif
00538 
00539   if (convertVideoFrameToAVFrame(frame,
00540                                  itsContext.pix_fmt,
00541                                  &picture))
00542     {
00543       this->writeRawFrame(&picture);
00544     }
00545   else
00546     {
00547       // OK, we couldn't do a direct conversion from
00548       // VideoFrame->AVFrame (probably the pixel formats didn't
00549       // match), so let's just fall back to RGB instead:
00550       this->writeRGB(frame.toRgb());
00551     }
00552 }
00553 
00554 void FfmpegEncoder::writeFrame(const GenericFrame& f)
00555 {
00556   if (f.nativeType() == GenericFrame::VIDEO)
00557     {
00558       this->writeVideoFrame(f.asVideo());
00559     }
00560   else
00561     {
00562       this->writeRGB(f.asRgb());
00563     }
00564 }
00565 
00566 #endif // HAVE_FFMPEG_AVCODEC_H
00567 
00568 // ######################################################################
00569 /* So things look consistent in everyone's emacs... */
00570 /* Local Variables: */
00571 /* mode: c++ */
00572 /* indent-tabs-mode: nil */
00573 /* End: */
Generated on Sun May 8 08:41:00 2011 for iLab Neuromorphic Vision Toolkit by  doxygen 1.6.3