00001 /*!@file Media/FfmpegEncoder.C Low-level class for using ffmpeg to decode movie files */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00005 // by the University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Rob Peters <rjpeters at usc dot edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Media/FfmpegEncoder.C $ 00035 // $Id: FfmpegEncoder.C 12962 2010-03-06 02:13:53Z irock $ 00036 // 00037 00038 00039 #include "Media/FfmpegEncoder.H" 00040 00041 #include "Image/Image.H" 00042 #include "Image/Pixels.H" 00043 #include "Image/color_conversions.H" // for rgb24_to_yv12_c() 00044 #include "Raster/GenericFrame.H" 00045 #include "Util/log.H" 00046 #include "Video/FfmpegFrame.H" 00047 #include "Video/VideoFrame.H" 00048 #include "rutz/arrays.h" 00049 #include "rutz/trace.h" 00050 00051 #ifdef INVT_HAVE_AVCODEC 00052 00053 // ###################################################################### 00054 FfmpegEncoder::FfmpegEncoder(const std::string& fname, 00055 const std::string& codecname, 00056 const int bitrate, 00057 const int framerate, 00058 const int frameratebase, 00059 const Dims& dims, 00060 const int bufsz, 00061 const bool useFormatContext) 00062 : 00063 itsFile(0), 00064 itsContext(), 00065 itsFormatContext(0), 00066 itsFrameNumber(0), 00067 itsOutbufSize(bufsz), 00068 itsFrameSizeRange(), 00069 itsUseFormatContext(useFormatContext) 00070 { 00071 GVX_TRACE(__PRETTY_FUNCTION__); 00072 00073 // no need to guard these functions for being called multiple times; 00074 // they all have internal guards 00075 av_register_all(); 00076 avcodec_init(); 00077 avcodec_register_all(); 00078 00079 AVOutputFormat* oformat = NULL; 00080 if (codecname.compare("List") == 0) { // list available codecs 00081 LINFO("##### Available output codecs (not all may work for video):"); 00082 for(AVOutputFormat* f = first_oformat; f != NULL; f = f->next) 00083 LINFO("%s: %s %d", f->name, f->long_name, f->flags); 00084 LFATAL("Please select a codec from this list"); 00085 } else { // format is given 00086 // no av_find_output_format()?? let's do it by hand... 00087 for(AVOutputFormat* f = first_oformat; f != NULL; f = f->next) 00088 if (codecname.compare(f->name) == 0) 00089 { oformat = f; break; } 00090 } 00091 00092 if (oformat == 0) 00093 LFATAL("No such video codec '%s';\n" 00094 "try re-running with --output-codec=List to see a list\n" 00095 "of available codecs", codecname.c_str()); 00096 00097 char ext[100]; ext[0] = '.'; uint i; 00098 for (i = 0; i < strlen(oformat->extensions); i ++) 00099 if (oformat->extensions[i] == ',') break; 00100 else ext[i+1] = oformat->extensions[i]; 00101 ext[i+1] = '\0'; 00102 LINFO("Using output format '%s' (%s), extension %s", oformat->name, 00103 oformat->long_name, ext); 00104 00105 std::string oname(fname); 00106 std::string::size_type idx1 = oname.rfind('/', oname.npos); 00107 std::string::size_type idx2 = oname.rfind('.', oname.npos); 00108 // must check that idx2 is valid; otherwise if we do 00109 // oname.erase(idx2) with e.g. idx2==npos then we will get a 00110 // std::out_of_range exception 00111 if (idx2 < oname.size() && idx2 > idx1) 00112 oname.erase(idx2, oname.npos); 00113 oname.append(ext); 00114 LINFO("Output file: %s", oname.c_str()); 00115 00116 if (itsUseFormatContext) 00117 { 00118 #ifdef INVT_FFMPEG_HAS_FORMATCONTEXT_FUNCTIONS 00119 LINFO("Using FormatContext to output data"); 00120 itsFormatContext = av_alloc_format_context(); 00121 if (!itsFormatContext) 00122 LFATAL("Cannot allocate format context"); 00123 itsFormatContext->oformat = oformat; 00124 00125 itsAVStream = av_new_stream(itsFormatContext, 0); 00126 if (!itsAVStream) 00127 LFATAL("Can not allocate AVStream"); 00128 #else 00129 LFATAL("Need a new version of ffmpeg libs for this option"); 00130 itsFormatContext = NULL; 00131 #endif 00132 } 00133 00134 AVCodec* const codec = avcodec_find_encoder(oformat->video_codec); 00135 if (codec == NULL) LFATAL("codec not found"); 00136 00137 #if defined(INVT_FFMPEG_HAS_DEFAULTS_FUNCTIONS) 00138 avcodec_get_context_defaults(&itsContext); 00139 #else 00140 { 00141 AVCodecContext* const tmp = avcodec_alloc_context(); 00142 memcpy(&itsContext, tmp, sizeof(AVCodecContext)); 00143 free(tmp); 00144 } 00145 #endif 00146 00147 itsContext.bit_rate = bitrate; 00148 00149 // Be sure to set itsContext.pix_fmt -- it may occasionally 00150 // appear to work to leave pix_fmt unset, because the value we want, 00151 // PIX_FMT_YUV420P, has the enum value of 0, so if the uninitialized 00152 // memory for pix_fmt happens to have the value 0, then we'll slip 00153 // through without setting it explicitly. 00154 itsContext.pix_fmt = PIX_FMT_YUV420P; 00155 00156 /* resolution must be a multiple of two */ 00157 itsContext.width = dims.w(); 00158 itsContext.height = dims.h(); 00159 #if defined(INVT_FFMPEG_AVCODECCONTEXT_HAS_TIME_BASE) 00160 AVRational time_base = { frameratebase, framerate }; 00161 itsContext.time_base = time_base; 00162 const int frb = frameratebase; 00163 #elif LIBAVCODEC_VERSION_INT >= 0x000406 && LIBAVCODEC_BUILD > 4665 00164 itsContext.frame_rate = framerate; 00165 const int frb = frameratebase; 00166 itsContext.frame_rate_base = frb; 00167 #else 00168 itsContext.frame_rate = framerate; 00169 const int frb = FRAME_RATE_BASE; 00170 #endif 00171 itsContext.gop_size = 10; /* emit one intra frame every ten frames */ 00172 00173 if(codec->id != CODEC_ID_MPEG4 && 00174 codec->id != CODEC_ID_MPEG1VIDEO && 00175 codec->id != CODEC_ID_MPEG2VIDEO) 00176 itsContext.max_b_frames = 0; 00177 else 00178 itsContext.max_b_frames = 1; 00179 00180 itsFrameNumber = 0; 00181 00182 LINFO("using max_b_frames=%i bitrate=%u width=%u height=%u framerate=%u frameratebase=%u", 00183 itsContext.max_b_frames, itsContext.bit_rate, itsContext.width, itsContext.height, framerate, frb); 00184 00185 if (avcodec_open(&itsContext, codec) < 0) 00186 LFATAL("could not open codec\n"); 00187 00188 if (itsUseFormatContext) 00189 { 00190 #ifdef INVT_FFMPEG_HAS_FORMATCONTEXT_FUNCTIONS 00191 AVCodecContext *c = itsAVStream->codec; 00192 c->codec_id = itsContext.codec_id; 00193 c->codec_type = CODEC_TYPE_VIDEO; 00194 00195 /* put sample parameters */ 00196 c->bit_rate = itsContext.bit_rate; 00197 /* resolution must be a multiple of two */ 00198 c->width = itsContext.width; 00199 c->height = itsContext.height; 00200 /* time base: this is the fundamental unit of time (in seconds) in terms 00201 of which frame timestamps are represented. for fixed-fps content, 00202 timebase should be 1/framerate and timestamp increments should be 00203 identically 1. */ 00204 #if defined(INVT_FFMPEG_AVCODECCONTEXT_HAS_TIME_BASE) 00205 c->time_base.den = itsContext.time_base.den; 00206 c->time_base.num = itsContext.time_base.num; 00207 #endif 00208 c->gop_size = 12; /* emit one intra frame every twelve frames at most */ 00209 c->pix_fmt = itsContext.pix_fmt; 00210 00211 /* set the output parameters (must be done even if no 00212 parameters). */ 00213 if (av_set_parameters(itsFormatContext, NULL) < 0) 00214 LFATAL("Invalid output format parameters"); 00215 00216 #if defined(INVT_FFMPEG_URL_OPEN_FUNC_TAKES_SINGLE_POINTER) 00217 00218 #if defined(INVT_FFMPEG_AVFORMATCONTEXT_BYTEIO_ISPOINTER) 00219 if (url_fopen(itsFormatContext->pb, oname.c_str(), URL_WRONLY) < 0) 00220 LFATAL("Could not open '%s'", oname.c_str()); 00221 #else 00222 if (url_fopen(&itsFormatContext->pb, oname.c_str(), URL_WRONLY) < 0) 00223 LFATAL("Could not open '%s'", oname.c_str()); 00224 #endif 00225 00226 #else 00227 00228 #if defined(INVT_FFMPEG_AVFORMATCONTEXT_BYTEIO_ISPOINTER) 00229 if (url_fopen(&itsFormatContext->pb, oname.c_str(), URL_WRONLY) < 0) 00230 LFATAL("Could not open '%s'", oname.c_str()); 00231 #else 00232 LFATAL("Could not open '%s' ffmpeg version mismatch", oname.c_str()); 00233 #endif 00234 00235 #endif //INVT_FFMPEG_URL_OPEN_FUNC_TAKES_SINGLE_POINTER) 00236 00237 00238 00239 /* write the stream header, if any */ 00240 av_write_header(itsFormatContext); 00241 #else 00242 LFATAL("Need a new version of FFMPEG for this option"); 00243 #endif 00244 } else { 00245 itsFile = fopen(oname.c_str(), "w"); 00246 if (itsFile==NULL) 00247 LFATAL("could not open file! %s", oname.c_str()); 00248 } 00249 00250 LINFO("EnCoder Inited"); 00251 } 00252 00253 FfmpegEncoder::~FfmpegEncoder() 00254 { 00255 close(); 00256 } 00257 00258 int FfmpegEncoder::close() 00259 { 00260 GVX_TRACE(__PRETTY_FUNCTION__); 00261 00262 if (itsUseFormatContext) 00263 { 00264 //if we went through this function already, then all the memory is freed 00265 if (itsFormatContext == NULL) 00266 return 0; 00267 } else { 00268 if (itsFile == NULL) 00269 return 0; 00270 } 00271 00272 00273 // (1) write any "delayed frames" 00274 { 00275 byte* const outbuf = (byte*) calloc(itsOutbufSize, 1); 00276 00277 if (outbuf != 0) 00278 { 00279 while (true) 00280 { 00281 LINFO("pre frame number %d", itsContext.frame_number); 00282 00283 const int out_size = 00284 avcodec_encode_video(&itsContext, outbuf, 00285 itsOutbufSize, NULL); 00286 00287 if (out_size <= 0) 00288 break; 00289 00290 itsFrameSizeRange.merge(out_size); 00291 00292 if (itsUseFormatContext) 00293 { 00294 #ifdef INVT_FFMPEG_HAS_FORMATCONTEXT_FUNCTIONS 00295 if (out_size > 0) 00296 { 00297 AVPacket pkt; 00298 av_init_packet(&pkt); 00299 00300 #if defined(INVT_FFMPEG_AVCODECCONTEXT_HAS_TIME_BASE) 00301 pkt.pts= av_rescale_q(itsContext.coded_frame->pts, 00302 itsContext.time_base, itsAVStream->time_base); 00303 #endif 00304 if(itsContext.coded_frame->key_frame) 00305 pkt.flags |= PKT_FLAG_KEY; 00306 pkt.stream_index= itsAVStream->index; 00307 pkt.data= outbuf; 00308 pkt.size= out_size; 00309 00310 /* write the compressed frame in the media file */ 00311 av_write_frame(itsFormatContext, &pkt); 00312 } 00313 #else 00314 LFATAL("Need a new version of ffmpeg for this option"); 00315 #endif 00316 } else { 00317 fwrite(outbuf, 1, out_size, itsFile); 00318 } 00319 00320 LINFO("post frame number %d", itsContext.frame_number); 00321 LINFO("delayed frame (out_size=%d)", out_size); 00322 } 00323 00324 free(outbuf); 00325 } 00326 } 00327 00328 if (!itsUseFormatContext) 00329 { 00330 //(2) add sequence end code 00331 { 00332 char outbuf[8]; 00333 outbuf[0] = 0x00; 00334 outbuf[1] = 0x00; 00335 outbuf[2] = 0x01; 00336 outbuf[3] = 0xb7; 00337 fwrite(outbuf, 1, 4, itsFile); 00338 } 00339 } 00340 00341 00342 00343 LINFO("end encoder: wrote %d frames, itsFrameSizeRange=[%d..%d]", 00344 itsFrameNumber, itsFrameSizeRange.min(), itsFrameSizeRange.max()); 00345 00346 if (itsUseFormatContext) 00347 { 00348 #ifdef INVT_FFMPEG_HAS_FORMATCONTEXT_FUNCTIONS 00349 avcodec_close(&itsContext); 00350 00351 av_write_trailer(itsFormatContext); 00352 00353 /* free the streams */ 00354 for(uint i = 0; i < (uint)itsFormatContext->nb_streams; i++) { 00355 av_freep(&itsFormatContext->streams[i]->codec); 00356 av_freep(&itsFormatContext->streams[i]); 00357 } 00358 00359 #if defined(INVT_FFMPEG_AVFORMATCONTEXT_BYTEIO_ISPOINTER) 00360 url_fclose(itsFormatContext->pb); 00361 #else 00362 url_fclose(&itsFormatContext->pb); 00363 #endif 00364 00365 av_free(itsFormatContext); 00366 itsFormatContext = NULL; 00367 #else 00368 LFATAL("Need a new version of ffmpeg for this option"); 00369 #endif 00370 } else { 00371 fclose(itsFile); 00372 itsFile = NULL; 00373 } 00374 00375 return 0; 00376 } 00377 00378 void FfmpegEncoder::writeRawFrame(const AVFrame* picture) 00379 { 00380 GVX_TRACE(__PRETTY_FUNCTION__); 00381 00382 // FIXME We'd like to have a way to either (1) compute what the 00383 // maximum necessary itsOutbufSize would be for our given 00384 // framerate+bitrate, or (2) get a chance to retry writing a given 00385 // frame if it is truncated. However, we have no programmatic way of 00386 // knowing whether a given frame gets truncated (all we see is that 00387 // ffmpeg prints "encoded frame too large" on stderr), but even then 00388 // the return value from avcodec_encode_video() is less than our 00389 // itsOutbufSize (although for a "too large" frame we can see 00390 // that the return value is clearly higher than usual). Also, it's 00391 // hard to determine a hard upper limit on the bufsize, even given 00392 // the framerate and bitrate, because the bitrate is only achieved 00393 // on /average/ -- so, any particular frame might be much larger 00394 // (e.g., 10x or 100x) than the average frame size. So, given all 00395 // that, our current approach is just to leave the buffer size up to 00396 // the user via the --output-mpeg-bufsize command-line option. 00397 00398 // NOTE: it might seem extravagent to allocate+deallocate these 00399 // buffers (outbuf, and picture_buf in writeRGB()) for every single 00400 // frame that is written; however, profiling shows that this 00401 // accounts for only about 2% of the total time spent in 00402 // writeFrame(). The alternatives, both with their own 00403 // disadvantages, would be (1) have separate buffers allocated once 00404 // per object; however this would be expensive in overall memory 00405 // usage if we had multiple mpeg streams open at once; or (2) have 00406 // static buffers shared by all objects; however, this would require 00407 // some form of between-object synchronization in the case of 00408 // multi-threading which could be cpu-expensive both for the 00409 // locking+unlocking and would also waste time waiting to acquire 00410 // the lock for access to the shared buffers. 00411 00412 rutz::fixed_block<byte> outbuf(itsOutbufSize); 00413 00414 const int out_size = avcodec_encode_video(&itsContext, 00415 &outbuf[0], 00416 outbuf.size(), 00417 picture); 00418 00419 if (out_size < 0) 00420 LFATAL("error during avcodec_encode_video()"); 00421 00422 if (out_size > 0) 00423 { 00424 itsFrameSizeRange.merge(out_size); 00425 00426 if (itsUseFormatContext) 00427 { 00428 #ifdef INVT_FFMPEG_HAS_FORMATCONTEXT_FUNCTIONS 00429 AVPacket pkt; 00430 av_init_packet(&pkt); 00431 00432 pkt.pts= av_rescale_q(itsContext.coded_frame->pts, 00433 itsContext.time_base, itsAVStream->time_base); 00434 if(itsContext.coded_frame->key_frame) 00435 pkt.flags |= PKT_FLAG_KEY; 00436 pkt.stream_index= itsAVStream->index; 00437 pkt.data= &outbuf[0]; 00438 pkt.size= out_size; 00439 00440 /* write the compressed frame in the media file */ 00441 av_write_frame(itsFormatContext, &pkt); 00442 #else 00443 LFATAL("New a new version of ffmpeg for this option"); 00444 #endif 00445 } else { 00446 fwrite(&outbuf[0], 1, out_size, itsFile); 00447 } 00448 } 00449 00450 LDEBUG("itsOutbufSize=%d, out_size=%d, frameSizeRange=[%d..%d]", 00451 itsOutbufSize, out_size, 00452 itsFrameSizeRange.min(), itsFrameSizeRange.max()); 00453 00454 LDEBUG("encoded frame [zero-based] %d (%d delayed frames pending)", 00455 itsFrameNumber, 00456 // to compute the number of pending "delayed frames", we 00457 // subtract the AVCodecContext's frame number from our own, 00458 // except that there is an offset of 2 -- one because 00459 // AVCodecContext counts from 1, while we count from zero, and 00460 // another because AVCodecContext's counter reports the number 00461 // of the NEXT frame to be written, while itsFrameNumber is 00462 // the number of the frame that has just been written 00463 itsFrameNumber - (itsContext.frame_number - 2)); 00464 00465 ++itsFrameNumber; 00466 } 00467 00468 void FfmpegEncoder::writeRGB(const Image<PixRGB<byte> >& img) 00469 { 00470 GVX_TRACE(__PRETTY_FUNCTION__); 00471 00472 ASSERT(PIX_FMT_YUV420P == itsContext.pix_fmt); 00473 00474 const int size = itsContext.width * itsContext.height; 00475 const int size4 = 00476 ((itsContext.width+1)/2) * (itsContext.height/2); 00477 00478 rutz::fixed_block<byte> picture_buf(size + 2*size4); /* size for YUV 420 */ 00479 00480 AVFrame picture; 00481 #if defined(INVT_FFMPEG_HAS_DEFAULTS_FUNCTIONS) 00482 avcodec_get_frame_defaults(&picture); 00483 #else 00484 { 00485 AVFrame* tmp = avcodec_alloc_frame(); 00486 memcpy(&picture, tmp, sizeof(AVFrame)); 00487 free(tmp); 00488 } 00489 #endif 00490 00491 picture.data[0] = &picture_buf[0]; 00492 picture.data[1] = &picture_buf[0] + size; 00493 picture.data[2] = &picture_buf[0] + size + size4; 00494 picture.linesize[0] = itsContext.width; 00495 picture.linesize[1] = (itsContext.width+1) / 2; 00496 picture.linesize[2] = (itsContext.width+1) / 2; 00497 00498 if (img.getWidth() != itsContext.width || 00499 img.getHeight() != itsContext.height) 00500 { 00501 LFATAL("wrong size mpeg output frame " 00502 "(expected %dx%d, got %dx%d)", 00503 itsContext.width, itsContext.height, 00504 img.getWidth(), img.getHeight()); 00505 } 00506 00507 rgb24_to_yv12_c(img, 00508 picture.data[0], 00509 picture.data[1], 00510 picture.data[2]); 00511 00512 this->writeRawFrame(&picture); 00513 } 00514 00515 void FfmpegEncoder::writeVideoFrame(const VideoFrame& frame) 00516 { 00517 GVX_TRACE(__PRETTY_FUNCTION__); 00518 00519 if (frame.getDims().w() != itsContext.width || 00520 frame.getDims().h() != itsContext.height) 00521 { 00522 LFATAL("wrong size mpeg output frame " 00523 "(expected %dx%d, got %dx%d)", 00524 itsContext.width, itsContext.height, 00525 frame.getDims().w(), frame.getDims().h()); 00526 } 00527 00528 AVFrame picture; 00529 #if defined(INVT_FFMPEG_HAS_DEFAULTS_FUNCTIONS) 00530 avcodec_get_frame_defaults(&picture); 00531 #else 00532 { 00533 AVFrame* tmp = avcodec_alloc_frame(); 00534 memcpy(&picture, tmp, sizeof(AVFrame)); 00535 free(tmp); 00536 } 00537 #endif 00538 00539 if (convertVideoFrameToAVFrame(frame, 00540 itsContext.pix_fmt, 00541 &picture)) 00542 { 00543 this->writeRawFrame(&picture); 00544 } 00545 else 00546 { 00547 // OK, we couldn't do a direct conversion from 00548 // VideoFrame->AVFrame (probably the pixel formats didn't 00549 // match), so let's just fall back to RGB instead: 00550 this->writeRGB(frame.toRgb()); 00551 } 00552 } 00553 00554 void FfmpegEncoder::writeFrame(const GenericFrame& f) 00555 { 00556 if (f.nativeType() == GenericFrame::VIDEO) 00557 { 00558 this->writeVideoFrame(f.asVideo()); 00559 } 00560 else 00561 { 00562 this->writeRGB(f.asRgb()); 00563 } 00564 } 00565 00566 #endif // HAVE_FFMPEG_AVCODEC_H 00567 00568 // ###################################################################### 00569 /* So things look consistent in everyone's emacs... */ 00570 /* Local Variables: */ 00571 /* mode: c++ */ 00572 /* indent-tabs-mode: nil */ 00573 /* End: */