FfmpegFrame.C

Go to the documentation of this file.
00001 /*!@file Video/FfmpegFrame.C Conversions between ffmpeg's AVFrame and our VideoFrame */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005   //
00005 // by the University of Southern California (USC) and the iLab at USC.  //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: Rob Peters <rjpeters at usc dot edu>
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Video/FfmpegFrame.C $
00035 // $Id: FfmpegFrame.C 12785 2010-02-06 02:24:05Z irock $
00036 //
00037 
00038 #ifndef VIDEO_FFMPEGFRAME_C_DEFINED
00039 #define VIDEO_FFMPEGFRAME_C_DEFINED
00040 
00041 #ifdef INVT_HAVE_AVCODEC
00042 
00043 #include "Video/FfmpegFrame.H"
00044 
00045 #include "Image/Image.H"
00046 #include "Image/Pixels.H"
00047 #include "Image/color_conversions.H" // for yv12_to_rgb24_c
00048 #include "Video/VideoFrame.H"
00049 #include "rutz/trace.h"
00050 
00051 #if defined(LIBAVCODEC_BUILD) && (LIBAVCODEC_BUILD > 4718)
00052 #  define HAVE_PIX_FMT_UYVY422
00053 #endif
00054 
00055 #if ! PIX_FMT_UYVY411
00056 #define PIX_FMT_UYVY411 PIX_FMT_UYYVYY411
00057 #endif
00058 
00059 #if ! PIX_FMT_RGBA32
00060 #define PIX_FMT_RGBA32  PIX_FMT_RGB32
00061 #endif
00062 
00063 #if ! PIX_FMT_YUV422
00064 #define PIX_FMT_YUV422  PIX_FMT_YUYV422
00065 #endif
00066 
00067 namespace
00068 {
00069   inline void copy_rows(byte* dst, const int dstlen,
00070                         const byte* src, const int srclen,
00071                         int h)
00072   {
00073     if (dstlen == srclen)
00074       {
00075         memcpy(dst, src, dstlen * h);
00076       }
00077     else
00078       {
00079         const int minlen = std::min(dstlen, srclen);
00080         for (int j = 0; j < h; ++j)
00081           {
00082             memcpy(dst, src, minlen); src += srclen; dst += dstlen;
00083           }
00084       }
00085   }
00086 }
00087 
00088 #ifdef HAVE_PIX_FMT_UYVY422
00089 // ######################################################################
00090 static VideoFrame uyvy_to_VideoFrame(const AVFrame* pic,
00091                                      const PixelFormat fmt,
00092                                      const Dims& dims)
00093 {
00094   ASSERT(fmt == PIX_FMT_UYVY422);
00095 
00096   if (pic == 0 || dims.isEmpty())
00097     return VideoFrame();
00098 
00099   const int w = dims.w();
00100   const int h = dims.h();
00101   ArrayHandle<byte> hdl
00102     (new ArrayData<byte>(Dims(((w+1)/2)*h*4,1), NO_INIT));
00103 
00104   const int dstlen = ((w+1)/2)*4, srclen = pic->linesize[0];
00105 
00106   copy_rows(hdl.uniq().dataw(), dstlen,
00107             pic->data[0], srclen, h);
00108 
00109   return VideoFrame(hdl, dims, VIDFMT_UYVY, false);
00110 }
00111 #endif
00112 
00113 // ######################################################################
00114 static VideoFrame yuv420p_to_VideoFrame(const AVFrame* pic,
00115                                         const PixelFormat fmt,
00116                                         const Dims& dims)
00117 {
00118   ASSERT(fmt == PIX_FMT_YUV420P);
00119 
00120   if (pic == 0 || dims.isEmpty())
00121     return VideoFrame();
00122 
00123   const int w = dims.w();
00124   const int h = dims.h();
00125   ArrayHandle<byte> hdl
00126     (new ArrayData<byte>(Dims(w * h + 2 * ( ((w+1) / 2) * ((h+1) / 2) ),
00127                               1),
00128                          NO_INIT));
00129   byte* buf = hdl.uniq().dataw();
00130 
00131   const byte* ysrc = pic->data[0];
00132   const byte* usrc = pic->data[1];
00133   const byte* vsrc = pic->data[2];
00134 
00135   byte* ydst = buf;
00136   byte* udst = ydst + w * h;
00137   byte* vdst = udst + ((w+1)/2) * ((h+1)/2);
00138 
00139   const int ydstlen = w,         ysrclen = pic->linesize[0];
00140   const int udstlen = ((w+1)/2), usrclen = pic->linesize[1];
00141   const int vdstlen = ((w+1)/2), vsrclen = pic->linesize[2];
00142 
00143   copy_rows(ydst, ydstlen, ysrc, ysrclen, h);
00144   copy_rows(udst, udstlen, usrc, usrclen, h/2);
00145   copy_rows(vdst, vdstlen, vsrc, vsrclen, h/2);
00146 
00147   return VideoFrame(hdl, dims, VIDFMT_YUV420P, false);
00148 }
00149 
00150 // ######################################################################
00151 static VideoFrame yuv422p_to_VideoFrame(const AVFrame* pic,
00152                                         const PixelFormat fmt,
00153                                         const Dims& dims)
00154 {
00155   ASSERT(fmt == PIX_FMT_YUVJ422P);
00156 
00157   if (pic == 0 || dims.isEmpty())
00158     return VideoFrame();
00159 
00160   const int w = dims.w();
00161   const int h = dims.h();
00162   ArrayHandle<byte> hdl
00163     (new ArrayData<byte>(Dims(w * h + 2 * ( ((w+1) / 2) * h ),
00164                               1),
00165                          NO_INIT));
00166   byte* buf = hdl.uniq().dataw();
00167 
00168   const byte* ysrc = pic->data[0];
00169   const byte* usrc = pic->data[1];
00170   const byte* vsrc = pic->data[2];
00171 
00172   byte* ydst = buf;
00173   byte* udst = ydst + w * h;
00174   byte* vdst = udst + ((w+1)/2) * h;
00175 
00176   const int ydstlen = w,         ysrclen = pic->linesize[0];
00177   const int udstlen = ((w+1)/2), usrclen = pic->linesize[1];
00178   const int vdstlen = ((w+1)/2), vsrclen = pic->linesize[2];
00179 
00180   copy_rows(ydst, ydstlen, ysrc, ysrclen, h);
00181   copy_rows(udst, udstlen, usrc, usrclen, h);
00182   copy_rows(vdst, vdstlen, vsrc, vsrclen, h);
00183 
00184   return VideoFrame(hdl, dims, VIDFMT_YUV422P, false);
00185 }
00186 
00187 // ######################################################################
00188 static VideoFrame yuv411p_to_VideoFrame(const AVFrame* pic,
00189                                         const PixelFormat fmt,
00190                                         const Dims& dims)
00191 {
00192   ASSERT(fmt == PIX_FMT_YUV411P);
00193 
00194   if (pic == 0 || dims.isEmpty())
00195     return VideoFrame();
00196 
00197   const int w = dims.w();
00198   const int h = dims.h();
00199   ArrayHandle<byte> hdl
00200     (new ArrayData<byte>(Dims(w * h + 2 * ( ((w+3) / 4) * h ),
00201                               1),
00202                          NO_INIT));
00203   byte* buf = hdl.uniq().dataw();
00204 
00205   const byte* ysrc = pic->data[0];
00206   const byte* usrc = pic->data[1];
00207   const byte* vsrc = pic->data[2];
00208 
00209   byte* ydst = buf;
00210   byte* udst = ydst + w * h;
00211   byte* vdst = udst + ((w+3)/4) * h;
00212 
00213   const int ydstlen = w,         ysrclen = pic->linesize[0];
00214   const int udstlen = ((w+3)/4), usrclen = pic->linesize[1];
00215   const int vdstlen = ((w+3)/4), vsrclen = pic->linesize[2];
00216 
00217   copy_rows(ydst, ydstlen, ysrc, ysrclen, h);
00218   copy_rows(udst, udstlen, usrc, usrclen, h);
00219   copy_rows(vdst, vdstlen, vsrc, vsrclen, h);
00220 
00221   return VideoFrame(hdl, dims, VIDFMT_YUV411P, false);
00222 }
00223 
00224 // ######################################################################
00225 VideoFormat convertAVPixelFormatToVideoFormat(const PixelFormat fmt)
00226 {
00227   switch (fmt)
00228     {
00229 #ifdef HAVE_PIX_FMT_UYVY422
00230     case PIX_FMT_UYVY422:  return VIDFMT_UYVY;
00231 #endif
00232     case PIX_FMT_YUV420P:  return VIDFMT_YUV420P;
00233     case PIX_FMT_YUV422P:  return VIDFMT_YUV422P;
00234     case PIX_FMT_YUVJ422P: return VIDFMT_YUV422P;
00235     case PIX_FMT_YUV411P:  return VIDFMT_YUV411P;
00236     default:
00237       LFATAL("Oops! I don't know how to convert from "
00238              "PixelFormat %s (%d) to VideoFormat",
00239              convertToString(fmt).c_str(), int(fmt));
00240     }
00241 
00242   /* can't happen */ return VideoFormat(0);
00243 }
00244 
00245 // ######################################################################
00246 VideoFrame convertAVFrameToVideoFrame(const AVFrame* pic,
00247                                       const PixelFormat fmt,
00248                                       const Dims& dims)
00249 {
00250 GVX_TRACE(__PRETTY_FUNCTION__);
00251 
00252   switch (fmt)
00253     {
00254 #ifdef HAVE_PIX_FMT_UYVY422
00255     case PIX_FMT_UYVY422:  return uyvy_to_VideoFrame(pic,fmt,dims);
00256 #endif
00257     case PIX_FMT_YUV420P:  return yuv420p_to_VideoFrame(pic,fmt,dims);
00258     case PIX_FMT_YUV422P:  return yuv422p_to_VideoFrame(pic,fmt,dims);
00259     case PIX_FMT_YUVJ422P: return yuv422p_to_VideoFrame(pic,fmt,dims);
00260     case PIX_FMT_YUV411P:  return yuv411p_to_VideoFrame(pic,fmt,dims);
00261     default:
00262       LFATAL("Oops! I don't know how to convert from AVFrame with "
00263              "PixelFormat %s (%d) to VideoFrame",
00264              convertToString(fmt).c_str(), int(fmt));
00265     }
00266 
00267   /* can't happen */ return VideoFrame();
00268 }
00269 
00270 // ######################################################################
00271 Image<PixRGB<byte> > convertAVFrameToRGB(const AVFrame* pic,
00272                                          const PixelFormat fmt,
00273                                          const Dims& dims)
00274 {
00275 GVX_TRACE(__PRETTY_FUNCTION__);
00276 
00277   if (fmt != PIX_FMT_YUV420P)
00278     return convertAVFrameToVideoFrame(pic, fmt, dims).toRgb();
00279 
00280   if (pic == 0 || dims.isEmpty())
00281     return Image<PixRGB<byte> >();
00282 
00283   Image<PixRGB<byte> > result(dims, ZEROS);
00284 
00285   yv12_to_rgb24_c(reinterpret_cast<byte*>(result.getArrayPtr()),
00286                   dims.w(),
00287                   pic->data[0],
00288                   pic->data[1],
00289                   pic->data[2],
00290                   pic->linesize[0],
00291                   pic->linesize[1],
00292                   dims.w(),
00293                   dims.h());
00294 
00295   return result;
00296 }
00297 
00298 // ######################################################################
00299 bool convertVideoFrameToAVFrame(const VideoFrame& vidframe,
00300                                 const PixelFormat fmt,
00301                                 AVFrame* pic)
00302 {
00303 GVX_TRACE(__PRETTY_FUNCTION__);
00304 
00305   // re-initialize the AVFrame
00306 #if defined(INVT_FFMPEG_HAS_DEFAULTS_FUNCTIONS)
00307   avcodec_get_frame_defaults(pic);
00308 #else
00309   {
00310     AVFrame* tmp = avcodec_alloc_frame();
00311     memcpy(pic, tmp, sizeof(AVFrame));
00312     free(tmp);
00313   }
00314 #endif
00315 
00316   switch (vidframe.getMode())
00317     {
00318     case VIDFMT_YUV420P:
00319       {
00320         if (fmt != PIX_FMT_YUV420P)
00321           {
00322             LDEBUG("Oops! I don't know how to convert from VideoFrame/%s"
00323                    "to AVFrame with PixelFormat '%d'",
00324                    convertToString(vidframe.getMode()).c_str(), int(fmt));
00325             return false;
00326           }
00327 
00328         const Dims dims = vidframe.getDims();
00329         byte* buf = const_cast<byte*>(vidframe.getBuffer());
00330 
00331         const int size = dims.w() * dims.h();
00332         const int size4 = ((dims.w()+1)/2) * (dims.h()/2);
00333 
00334         pic->data[0] = buf;
00335         pic->data[1] = buf + size;
00336         pic->data[2] = buf + size + size4;
00337         pic->linesize[0] = dims.w();
00338         pic->linesize[1] = (dims.w()+1) / 2;
00339         pic->linesize[2] = (dims.w()+1) / 2;
00340       }
00341       break;
00342 
00343     default:
00344       {
00345         LDEBUG("Oops! I don't know how to convert from VideoFormat "
00346                "'%s' to AVFrame",
00347                convertToString(vidframe.getMode()).c_str());
00348         return false;
00349       }
00350       break;
00351     }
00352 
00353   return true;
00354 }
00355 
00356 // ######################################################################
00357 std::string convertToString(const PixelFormat fmt)
00358 {
00359 #define PFCASE(X) case X: return std::string(#X); break
00360 
00361   switch (fmt)
00362     {
00363       // from ffmpeg/avcodec.h
00364 #if defined(LIBAVCODEC_BUILD) && (LIBAVCODEC_BUILD >= 4753)
00365       PFCASE(PIX_FMT_NONE);
00366 #endif
00367       PFCASE(PIX_FMT_YUV420P);   ///< Planar YUV 4:2:0 (1 Cr & Cb sample per 2x2 Y samples)
00368       PFCASE(PIX_FMT_YUV422);    ///< Packed pixel, Y0 Cb Y1 Cr
00369       PFCASE(PIX_FMT_RGB24);     ///< Packed pixel, 3 bytes per pixel, RGBRGB...
00370       PFCASE(PIX_FMT_BGR24);     ///< Packed pixel, 3 bytes per pixel, BGRBGR...
00371       PFCASE(PIX_FMT_YUV422P);   ///< Planar YUV 4:2:2 (1 Cr & Cb sample per 2x1 Y samples)
00372       PFCASE(PIX_FMT_YUV444P);   ///< Planar YUV 4:4:4 (1 Cr & Cb sample per 1x1 Y samples)
00373       PFCASE(PIX_FMT_RGBA32);    ///< Packed pixel, 4 bytes per pixel, BGRABGRA..., stored in cpu endianness
00374       PFCASE(PIX_FMT_YUV410P);   ///< Planar YUV 4:1:0 (1 Cr & Cb sample per 4x4 Y samples)
00375       PFCASE(PIX_FMT_YUV411P);   ///< Planar YUV 4:1:1 (1 Cr & Cb sample per 4x1 Y samples)
00376       PFCASE(PIX_FMT_RGB565);    ///< always stored in cpu endianness
00377       PFCASE(PIX_FMT_RGB555);    ///< always stored in cpu endianness, most significant bit to 1
00378       PFCASE(PIX_FMT_GRAY8);
00379       PFCASE(PIX_FMT_MONOWHITE); ///< 0 is white
00380       PFCASE(PIX_FMT_MONOBLACK); ///< 0 is black
00381       PFCASE(PIX_FMT_PAL8);      ///< 8 bit with RGBA palette
00382       PFCASE(PIX_FMT_YUVJ420P);  ///< Planar YUV 4:2:0 full scale (jpeg)
00383       PFCASE(PIX_FMT_YUVJ422P);  ///< Planar YUV 4:2:2 full scale (jpeg)
00384       PFCASE(PIX_FMT_YUVJ444P);  ///< Planar YUV 4:4:4 full scale (jpeg)
00385       PFCASE(PIX_FMT_XVMC_MPEG2_MC);///< XVideo Motion Acceleration via common packet passing(xvmc_render.h)
00386       PFCASE(PIX_FMT_XVMC_MPEG2_IDCT);
00387 #ifdef HAVE_PIX_FMT_UYVY422
00388       PFCASE(PIX_FMT_UYVY422);   ///< Packed pixel, Cb Y0 Cr Y1
00389 #endif
00390 #if defined(LIBAVCODEC_BUILD) && (LIBAVCODEC_BUILD > 4727)
00391       PFCASE(PIX_FMT_UYVY411);   ///< Packed pixel, Cb Y0 Y1 Cr Y2 Y3
00392 #endif
00393 
00394 #if defined(LIBAVUTIL_BUILD) && (LIBAVUTIL_BUILD > 3211264)
00395       PFCASE(PIX_FMT_BGR32);     ///< Packed RGB 8:8:8, 32bpp, (msb)8A 8B 8G 8R(lsb), in cpu endianness
00396       PFCASE(PIX_FMT_BGR565);    ///< Packed RGB 5:6:5, 16bpp, (msb)   5B 6G 5R(lsb), in cpu endianness
00397       PFCASE(PIX_FMT_BGR555);    ///< Packed RGB 5:5:5, 16bpp, (msb)1A 5B 5G 5R(lsb), in cpu endianness most significant bit to 1
00398       PFCASE(PIX_FMT_BGR8);      ///< Packed RGB 3:3:2,  8bpp, (msb)2B 3G 3R(lsb)
00399       PFCASE(PIX_FMT_BGR4);      ///< Packed RGB 1:2:1,  4bpp, (msb)1B 2G 1R(lsb)
00400       PFCASE(PIX_FMT_BGR4_BYTE); ///< Packed RGB 1:2:1,  8bpp, (msb)1B 2G 1R(lsb)
00401       PFCASE(PIX_FMT_RGB8);      ///< Packed RGB 3:3:2,  8bpp, (msb)2R 3G 3B(lsb)
00402       PFCASE(PIX_FMT_RGB4);      ///< Packed RGB 1:2:1,  4bpp, (msb)2R 3G 3B(lsb)
00403       PFCASE(PIX_FMT_RGB4_BYTE); ///< Packed RGB 1:2:1,  8bpp, (msb)2R 3G 3B(lsb)
00404       PFCASE(PIX_FMT_NV12);      ///< Planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 for UV
00405       PFCASE(PIX_FMT_NV21);      ///< as above, but U and V bytes are swapped
00406       PFCASE(PIX_FMT_RGB32_1);   ///< Packed RGB 8:8:8, 32bpp, (msb)8R 8G 8B 8A(lsb), in cpu endianness
00407       PFCASE(PIX_FMT_BGR32_1);   ///< Packed RGB 8:8:8, 32bpp, (msb)8B 8G 8R 8A(lsb), in cpu endianness
00408 #endif
00409 
00410       PFCASE(PIX_FMT_NB);
00411 
00412     default:
00413       return std::string("UNKNOWN");
00414     }
00415 
00416   /* can't happen */ return std::string();
00417 }
00418 
00419 #endif // HAVE_FFMPEG_AVCODEC_H
00420 
00421 // ######################################################################
00422 /* So things look consistent in everyone's emacs... */
00423 /* Local Variables: */
00424 /* mode: c++ */
00425 /* indent-tabs-mode: nil */
00426 /* End: */
00427 
00428 #endif // VIDEO_FFMPEGFRAME_C_DEFINED