00001 /*!@file Video/FfmpegFrame.C Conversions between ffmpeg's AVFrame and our VideoFrame */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00005 // by the University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Rob Peters <rjpeters at usc dot edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Video/FfmpegFrame.C $ 00035 // $Id: FfmpegFrame.C 12785 2010-02-06 02:24:05Z irock $ 00036 // 00037 00038 #ifndef VIDEO_FFMPEGFRAME_C_DEFINED 00039 #define VIDEO_FFMPEGFRAME_C_DEFINED 00040 00041 #ifdef INVT_HAVE_AVCODEC 00042 00043 #include "Video/FfmpegFrame.H" 00044 00045 #include "Image/Image.H" 00046 #include "Image/Pixels.H" 00047 #include "Image/color_conversions.H" // for yv12_to_rgb24_c 00048 #include "Video/VideoFrame.H" 00049 #include "rutz/trace.h" 00050 00051 #if defined(LIBAVCODEC_BUILD) && (LIBAVCODEC_BUILD > 4718) 00052 # define HAVE_PIX_FMT_UYVY422 00053 #endif 00054 00055 #if ! PIX_FMT_UYVY411 00056 #define PIX_FMT_UYVY411 PIX_FMT_UYYVYY411 00057 #endif 00058 00059 #if ! PIX_FMT_RGBA32 00060 #define PIX_FMT_RGBA32 PIX_FMT_RGB32 00061 #endif 00062 00063 #if ! PIX_FMT_YUV422 00064 #define PIX_FMT_YUV422 PIX_FMT_YUYV422 00065 #endif 00066 00067 namespace 00068 { 00069 inline void copy_rows(byte* dst, const int dstlen, 00070 const byte* src, const int srclen, 00071 int h) 00072 { 00073 if (dstlen == srclen) 00074 { 00075 memcpy(dst, src, dstlen * h); 00076 } 00077 else 00078 { 00079 const int minlen = std::min(dstlen, srclen); 00080 for (int j = 0; j < h; ++j) 00081 { 00082 memcpy(dst, src, minlen); src += srclen; dst += dstlen; 00083 } 00084 } 00085 } 00086 } 00087 00088 #ifdef HAVE_PIX_FMT_UYVY422 00089 // ###################################################################### 00090 static VideoFrame uyvy_to_VideoFrame(const AVFrame* pic, 00091 const PixelFormat fmt, 00092 const Dims& dims) 00093 { 00094 ASSERT(fmt == PIX_FMT_UYVY422); 00095 00096 if (pic == 0 || dims.isEmpty()) 00097 return VideoFrame(); 00098 00099 const int w = dims.w(); 00100 const int h = dims.h(); 00101 ArrayHandle<byte> hdl 00102 (new ArrayData<byte>(Dims(((w+1)/2)*h*4,1), NO_INIT)); 00103 00104 const int dstlen = ((w+1)/2)*4, srclen = pic->linesize[0]; 00105 00106 copy_rows(hdl.uniq().dataw(), dstlen, 00107 pic->data[0], srclen, h); 00108 00109 return VideoFrame(hdl, dims, VIDFMT_UYVY, false); 00110 } 00111 #endif 00112 00113 // ###################################################################### 00114 static VideoFrame yuv420p_to_VideoFrame(const AVFrame* pic, 00115 const PixelFormat fmt, 00116 const Dims& dims) 00117 { 00118 ASSERT(fmt == PIX_FMT_YUV420P); 00119 00120 if (pic == 0 || dims.isEmpty()) 00121 return VideoFrame(); 00122 00123 const int w = dims.w(); 00124 const int h = dims.h(); 00125 ArrayHandle<byte> hdl 00126 (new ArrayData<byte>(Dims(w * h + 2 * ( ((w+1) / 2) * ((h+1) / 2) ), 00127 1), 00128 NO_INIT)); 00129 byte* buf = hdl.uniq().dataw(); 00130 00131 const byte* ysrc = pic->data[0]; 00132 const byte* usrc = pic->data[1]; 00133 const byte* vsrc = pic->data[2]; 00134 00135 byte* ydst = buf; 00136 byte* udst = ydst + w * h; 00137 byte* vdst = udst + ((w+1)/2) * ((h+1)/2); 00138 00139 const int ydstlen = w, ysrclen = pic->linesize[0]; 00140 const int udstlen = ((w+1)/2), usrclen = pic->linesize[1]; 00141 const int vdstlen = ((w+1)/2), vsrclen = pic->linesize[2]; 00142 00143 copy_rows(ydst, ydstlen, ysrc, ysrclen, h); 00144 copy_rows(udst, udstlen, usrc, usrclen, h/2); 00145 copy_rows(vdst, vdstlen, vsrc, vsrclen, h/2); 00146 00147 return VideoFrame(hdl, dims, VIDFMT_YUV420P, false); 00148 } 00149 00150 // ###################################################################### 00151 static VideoFrame yuv422p_to_VideoFrame(const AVFrame* pic, 00152 const PixelFormat fmt, 00153 const Dims& dims) 00154 { 00155 ASSERT(fmt == PIX_FMT_YUVJ422P); 00156 00157 if (pic == 0 || dims.isEmpty()) 00158 return VideoFrame(); 00159 00160 const int w = dims.w(); 00161 const int h = dims.h(); 00162 ArrayHandle<byte> hdl 00163 (new ArrayData<byte>(Dims(w * h + 2 * ( ((w+1) / 2) * h ), 00164 1), 00165 NO_INIT)); 00166 byte* buf = hdl.uniq().dataw(); 00167 00168 const byte* ysrc = pic->data[0]; 00169 const byte* usrc = pic->data[1]; 00170 const byte* vsrc = pic->data[2]; 00171 00172 byte* ydst = buf; 00173 byte* udst = ydst + w * h; 00174 byte* vdst = udst + ((w+1)/2) * h; 00175 00176 const int ydstlen = w, ysrclen = pic->linesize[0]; 00177 const int udstlen = ((w+1)/2), usrclen = pic->linesize[1]; 00178 const int vdstlen = ((w+1)/2), vsrclen = pic->linesize[2]; 00179 00180 copy_rows(ydst, ydstlen, ysrc, ysrclen, h); 00181 copy_rows(udst, udstlen, usrc, usrclen, h); 00182 copy_rows(vdst, vdstlen, vsrc, vsrclen, h); 00183 00184 return VideoFrame(hdl, dims, VIDFMT_YUV422P, false); 00185 } 00186 00187 // ###################################################################### 00188 static VideoFrame yuv411p_to_VideoFrame(const AVFrame* pic, 00189 const PixelFormat fmt, 00190 const Dims& dims) 00191 { 00192 ASSERT(fmt == PIX_FMT_YUV411P); 00193 00194 if (pic == 0 || dims.isEmpty()) 00195 return VideoFrame(); 00196 00197 const int w = dims.w(); 00198 const int h = dims.h(); 00199 ArrayHandle<byte> hdl 00200 (new ArrayData<byte>(Dims(w * h + 2 * ( ((w+3) / 4) * h ), 00201 1), 00202 NO_INIT)); 00203 byte* buf = hdl.uniq().dataw(); 00204 00205 const byte* ysrc = pic->data[0]; 00206 const byte* usrc = pic->data[1]; 00207 const byte* vsrc = pic->data[2]; 00208 00209 byte* ydst = buf; 00210 byte* udst = ydst + w * h; 00211 byte* vdst = udst + ((w+3)/4) * h; 00212 00213 const int ydstlen = w, ysrclen = pic->linesize[0]; 00214 const int udstlen = ((w+3)/4), usrclen = pic->linesize[1]; 00215 const int vdstlen = ((w+3)/4), vsrclen = pic->linesize[2]; 00216 00217 copy_rows(ydst, ydstlen, ysrc, ysrclen, h); 00218 copy_rows(udst, udstlen, usrc, usrclen, h); 00219 copy_rows(vdst, vdstlen, vsrc, vsrclen, h); 00220 00221 return VideoFrame(hdl, dims, VIDFMT_YUV411P, false); 00222 } 00223 00224 // ###################################################################### 00225 VideoFormat convertAVPixelFormatToVideoFormat(const PixelFormat fmt) 00226 { 00227 switch (fmt) 00228 { 00229 #ifdef HAVE_PIX_FMT_UYVY422 00230 case PIX_FMT_UYVY422: return VIDFMT_UYVY; 00231 #endif 00232 case PIX_FMT_YUV420P: return VIDFMT_YUV420P; 00233 case PIX_FMT_YUV422P: return VIDFMT_YUV422P; 00234 case PIX_FMT_YUVJ422P: return VIDFMT_YUV422P; 00235 case PIX_FMT_YUV411P: return VIDFMT_YUV411P; 00236 default: 00237 LFATAL("Oops! I don't know how to convert from " 00238 "PixelFormat %s (%d) to VideoFormat", 00239 convertToString(fmt).c_str(), int(fmt)); 00240 } 00241 00242 /* can't happen */ return VideoFormat(0); 00243 } 00244 00245 // ###################################################################### 00246 VideoFrame convertAVFrameToVideoFrame(const AVFrame* pic, 00247 const PixelFormat fmt, 00248 const Dims& dims) 00249 { 00250 GVX_TRACE(__PRETTY_FUNCTION__); 00251 00252 switch (fmt) 00253 { 00254 #ifdef HAVE_PIX_FMT_UYVY422 00255 case PIX_FMT_UYVY422: return uyvy_to_VideoFrame(pic,fmt,dims); 00256 #endif 00257 case PIX_FMT_YUV420P: return yuv420p_to_VideoFrame(pic,fmt,dims); 00258 case PIX_FMT_YUV422P: return yuv422p_to_VideoFrame(pic,fmt,dims); 00259 case PIX_FMT_YUVJ422P: return yuv422p_to_VideoFrame(pic,fmt,dims); 00260 case PIX_FMT_YUV411P: return yuv411p_to_VideoFrame(pic,fmt,dims); 00261 default: 00262 LFATAL("Oops! I don't know how to convert from AVFrame with " 00263 "PixelFormat %s (%d) to VideoFrame", 00264 convertToString(fmt).c_str(), int(fmt)); 00265 } 00266 00267 /* can't happen */ return VideoFrame(); 00268 } 00269 00270 // ###################################################################### 00271 Image<PixRGB<byte> > convertAVFrameToRGB(const AVFrame* pic, 00272 const PixelFormat fmt, 00273 const Dims& dims) 00274 { 00275 GVX_TRACE(__PRETTY_FUNCTION__); 00276 00277 if (fmt != PIX_FMT_YUV420P) 00278 return convertAVFrameToVideoFrame(pic, fmt, dims).toRgb(); 00279 00280 if (pic == 0 || dims.isEmpty()) 00281 return Image<PixRGB<byte> >(); 00282 00283 Image<PixRGB<byte> > result(dims, ZEROS); 00284 00285 yv12_to_rgb24_c(reinterpret_cast<byte*>(result.getArrayPtr()), 00286 dims.w(), 00287 pic->data[0], 00288 pic->data[1], 00289 pic->data[2], 00290 pic->linesize[0], 00291 pic->linesize[1], 00292 dims.w(), 00293 dims.h()); 00294 00295 return result; 00296 } 00297 00298 // ###################################################################### 00299 bool convertVideoFrameToAVFrame(const VideoFrame& vidframe, 00300 const PixelFormat fmt, 00301 AVFrame* pic) 00302 { 00303 GVX_TRACE(__PRETTY_FUNCTION__); 00304 00305 // re-initialize the AVFrame 00306 #if defined(INVT_FFMPEG_HAS_DEFAULTS_FUNCTIONS) 00307 avcodec_get_frame_defaults(pic); 00308 #else 00309 { 00310 AVFrame* tmp = avcodec_alloc_frame(); 00311 memcpy(pic, tmp, sizeof(AVFrame)); 00312 free(tmp); 00313 } 00314 #endif 00315 00316 switch (vidframe.getMode()) 00317 { 00318 case VIDFMT_YUV420P: 00319 { 00320 if (fmt != PIX_FMT_YUV420P) 00321 { 00322 LDEBUG("Oops! I don't know how to convert from VideoFrame/%s" 00323 "to AVFrame with PixelFormat '%d'", 00324 convertToString(vidframe.getMode()).c_str(), int(fmt)); 00325 return false; 00326 } 00327 00328 const Dims dims = vidframe.getDims(); 00329 byte* buf = const_cast<byte*>(vidframe.getBuffer()); 00330 00331 const int size = dims.w() * dims.h(); 00332 const int size4 = ((dims.w()+1)/2) * (dims.h()/2); 00333 00334 pic->data[0] = buf; 00335 pic->data[1] = buf + size; 00336 pic->data[2] = buf + size + size4; 00337 pic->linesize[0] = dims.w(); 00338 pic->linesize[1] = (dims.w()+1) / 2; 00339 pic->linesize[2] = (dims.w()+1) / 2; 00340 } 00341 break; 00342 00343 default: 00344 { 00345 LDEBUG("Oops! I don't know how to convert from VideoFormat " 00346 "'%s' to AVFrame", 00347 convertToString(vidframe.getMode()).c_str()); 00348 return false; 00349 } 00350 break; 00351 } 00352 00353 return true; 00354 } 00355 00356 // ###################################################################### 00357 std::string convertToString(const PixelFormat fmt) 00358 { 00359 #define PFCASE(X) case X: return std::string(#X); break 00360 00361 switch (fmt) 00362 { 00363 // from ffmpeg/avcodec.h 00364 #if defined(LIBAVCODEC_BUILD) && (LIBAVCODEC_BUILD >= 4753) 00365 PFCASE(PIX_FMT_NONE); 00366 #endif 00367 PFCASE(PIX_FMT_YUV420P); ///< Planar YUV 4:2:0 (1 Cr & Cb sample per 2x2 Y samples) 00368 PFCASE(PIX_FMT_YUV422); ///< Packed pixel, Y0 Cb Y1 Cr 00369 PFCASE(PIX_FMT_RGB24); ///< Packed pixel, 3 bytes per pixel, RGBRGB... 00370 PFCASE(PIX_FMT_BGR24); ///< Packed pixel, 3 bytes per pixel, BGRBGR... 00371 PFCASE(PIX_FMT_YUV422P); ///< Planar YUV 4:2:2 (1 Cr & Cb sample per 2x1 Y samples) 00372 PFCASE(PIX_FMT_YUV444P); ///< Planar YUV 4:4:4 (1 Cr & Cb sample per 1x1 Y samples) 00373 PFCASE(PIX_FMT_RGBA32); ///< Packed pixel, 4 bytes per pixel, BGRABGRA..., stored in cpu endianness 00374 PFCASE(PIX_FMT_YUV410P); ///< Planar YUV 4:1:0 (1 Cr & Cb sample per 4x4 Y samples) 00375 PFCASE(PIX_FMT_YUV411P); ///< Planar YUV 4:1:1 (1 Cr & Cb sample per 4x1 Y samples) 00376 PFCASE(PIX_FMT_RGB565); ///< always stored in cpu endianness 00377 PFCASE(PIX_FMT_RGB555); ///< always stored in cpu endianness, most significant bit to 1 00378 PFCASE(PIX_FMT_GRAY8); 00379 PFCASE(PIX_FMT_MONOWHITE); ///< 0 is white 00380 PFCASE(PIX_FMT_MONOBLACK); ///< 0 is black 00381 PFCASE(PIX_FMT_PAL8); ///< 8 bit with RGBA palette 00382 PFCASE(PIX_FMT_YUVJ420P); ///< Planar YUV 4:2:0 full scale (jpeg) 00383 PFCASE(PIX_FMT_YUVJ422P); ///< Planar YUV 4:2:2 full scale (jpeg) 00384 PFCASE(PIX_FMT_YUVJ444P); ///< Planar YUV 4:4:4 full scale (jpeg) 00385 PFCASE(PIX_FMT_XVMC_MPEG2_MC);///< XVideo Motion Acceleration via common packet passing(xvmc_render.h) 00386 PFCASE(PIX_FMT_XVMC_MPEG2_IDCT); 00387 #ifdef HAVE_PIX_FMT_UYVY422 00388 PFCASE(PIX_FMT_UYVY422); ///< Packed pixel, Cb Y0 Cr Y1 00389 #endif 00390 #if defined(LIBAVCODEC_BUILD) && (LIBAVCODEC_BUILD > 4727) 00391 PFCASE(PIX_FMT_UYVY411); ///< Packed pixel, Cb Y0 Y1 Cr Y2 Y3 00392 #endif 00393 00394 #if defined(LIBAVUTIL_BUILD) && (LIBAVUTIL_BUILD > 3211264) 00395 PFCASE(PIX_FMT_BGR32); ///< Packed RGB 8:8:8, 32bpp, (msb)8A 8B 8G 8R(lsb), in cpu endianness 00396 PFCASE(PIX_FMT_BGR565); ///< Packed RGB 5:6:5, 16bpp, (msb) 5B 6G 5R(lsb), in cpu endianness 00397 PFCASE(PIX_FMT_BGR555); ///< Packed RGB 5:5:5, 16bpp, (msb)1A 5B 5G 5R(lsb), in cpu endianness most significant bit to 1 00398 PFCASE(PIX_FMT_BGR8); ///< Packed RGB 3:3:2, 8bpp, (msb)2B 3G 3R(lsb) 00399 PFCASE(PIX_FMT_BGR4); ///< Packed RGB 1:2:1, 4bpp, (msb)1B 2G 1R(lsb) 00400 PFCASE(PIX_FMT_BGR4_BYTE); ///< Packed RGB 1:2:1, 8bpp, (msb)1B 2G 1R(lsb) 00401 PFCASE(PIX_FMT_RGB8); ///< Packed RGB 3:3:2, 8bpp, (msb)2R 3G 3B(lsb) 00402 PFCASE(PIX_FMT_RGB4); ///< Packed RGB 1:2:1, 4bpp, (msb)2R 3G 3B(lsb) 00403 PFCASE(PIX_FMT_RGB4_BYTE); ///< Packed RGB 1:2:1, 8bpp, (msb)2R 3G 3B(lsb) 00404 PFCASE(PIX_FMT_NV12); ///< Planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 for UV 00405 PFCASE(PIX_FMT_NV21); ///< as above, but U and V bytes are swapped 00406 PFCASE(PIX_FMT_RGB32_1); ///< Packed RGB 8:8:8, 32bpp, (msb)8R 8G 8B 8A(lsb), in cpu endianness 00407 PFCASE(PIX_FMT_BGR32_1); ///< Packed RGB 8:8:8, 32bpp, (msb)8B 8G 8R 8A(lsb), in cpu endianness 00408 #endif 00409 00410 PFCASE(PIX_FMT_NB); 00411 00412 default: 00413 return std::string("UNKNOWN"); 00414 } 00415 00416 /* can't happen */ return std::string(); 00417 } 00418 00419 #endif // HAVE_FFMPEG_AVCODEC_H 00420 00421 // ###################################################################### 00422 /* So things look consistent in everyone's emacs... */ 00423 /* Local Variables: */ 00424 /* mode: c++ */ 00425 /* indent-tabs-mode: nil */ 00426 /* End: */ 00427 00428 #endif // VIDEO_FFMPEGFRAME_C_DEFINED