Upgrade to PRO for Only $50/Year—Limited-Time Offer! 🔥

Video decoding with FFmpeg on iOS and Android

Aman Karmani
October 17, 2018

Video decoding with FFmpeg on iOS and Android

A look at how FFmpeg wraps and exposes the hardware encoding and decoding APIs available on modern phones and tablets running iOS or Android

from Demuxed 2018

Aman Karmani

October 17, 2018
Tweet

More Decks by Aman Karmani

Other Decks in Technology

Transcript

  1. Demuxed 2018 @tmm1 FFmpeg Video decoding with libavformat libavcodec libavutil

    libavfilter iOS on and VideoToolbox Apple TV iPhone iPad MediaCodec Android TV Fire TV Android
  2. Aman Gupta @tmm1 @tmm1 [email protected] Channels app founder 2015 GitHub

    Employee #18 2011 ruby-core committer 2013 FFmpeg committer 2017
  3. FFmpeg libavformat protocols http, tcp, rtmp, udp, … muxers +

    demuxers hls, dash, mpegts, … data stream → AVPacket* libavcodec
  4. FFmpeg libavformat protocols http, tcp, rtmp, udp, … muxers +

    demuxers hls, dash, mpegts, … data stream → AVPacket* libavcodec encoders decoders + hwaccels AVFrame* → AVPacket* AVPacket* → AVFrame* mpeg2, h264, hevc, …
  5. SOFTWARE DECODING AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_H264); AVCodecContext *avctx = avcodec_alloc_context3(codec);

    avcodec_open2(avctx, codec, NULL); { find decoder and create instance { prepare decoder for data
  6. SOFTWARE DECODING AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_H264); AVCodecContext *avctx = avcodec_alloc_context3(codec);

    avcodec_open2(avctx, codec, NULL); AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_H264); AVCodecContext *avctx = avcodec_alloc_context3(codec); AVBufferRef *device_ref = NULL; av_hwdevice_ctx_create( &device_ref, AV_HWDEVICE_TYPE_VIDEOTOOLBOX, NULL, NULL, 0); avctx->hw_device_ctx = device_ref; enum AVPixelFormat get_vt_format(struct AVCodecContext *avctx, const enum AVPixelFormat *fmt) { return AV_PIX_FMT_VIDEOTOOLBOX; } avctx->get_format = get_vt_format; avcodec_open2(avctx, codec, NULL); HARDWARE DECODING: VIDEOTOOLBOX
  7. SOFTWARE DECODING AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_H264); AVCodecContext *avctx = avcodec_alloc_context3(codec);

    avcodec_open2(avctx, codec, NULL); AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_H264); AVCodecContext *avctx = avcodec_alloc_context3(codec); AVBufferRef *device_ref = NULL; av_hwdevice_ctx_create( &device_ref, AV_HWDEVICE_TYPE_VIDEOTOOLBOX, NULL, NULL, 0); avctx->hw_device_ctx = device_ref; enum AVPixelFormat get_vt_format(struct AVCodecContext *avctx, const enum AVPixelFormat *fmt) { return AV_PIX_FMT_VIDEOTOOLBOX; } avctx->get_format = get_vt_format; avcodec_open2(avctx, codec, NULL); HARDWARE DECODING: VIDEOTOOLBOX { create and attach HWDEVICE context
  8. SOFTWARE DECODING AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_H264); AVCodecContext *avctx = avcodec_alloc_context3(codec);

    avcodec_open2(avctx, codec, NULL); AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_H264); AVCodecContext *avctx = avcodec_alloc_context3(codec); AVBufferRef *device_ref = NULL; av_hwdevice_ctx_create( &device_ref, AV_HWDEVICE_TYPE_VIDEOTOOLBOX, NULL, NULL, 0); avctx->hw_device_ctx = device_ref; enum AVPixelFormat get_vt_format(struct AVCodecContext *avctx, const enum AVPixelFormat *fmt) { return AV_PIX_FMT_VIDEOTOOLBOX; } avctx->get_format = get_vt_format; avcodec_open2(avctx, codec, NULL); HARDWARE DECODING: VIDEOTOOLBOX { request videotoolbox pixel format
  9. SOFTWARE FRAMES AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx,

    frame); assert(frame->imgfmt == AV_PIX_FMT_YUV420P); { receive decoded frame
  10. SOFTWARE FRAMES AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx,

    frame); assert(frame->imgfmt == AV_PIX_FMT_YUV420P); AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx, frame); assert(frame->imgfmt == AV_PIX_FMT_VIDEOTOOLBOX); HARDWARE FRAMES: VIDEOTOOLBOX
  11. SOFTWARE FRAMES AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx,

    frame); assert(frame->imgfmt == AV_PIX_FMT_YUV420P); AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx, frame); assert(frame->imgfmt == AV_PIX_FMT_VIDEOTOOLBOX); HARDWARE FRAMES: VIDEOTOOLBOX { platform specific pixel format { generic pixel format
  12. SOFTWARE FRAMES AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx,

    frame); assert(frame->imgfmt == AV_PIX_FMT_YUV420P); AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx, frame); assert(frame->imgfmt == AV_PIX_FMT_VIDEOTOOLBOX); CVPixelBufferRef img = (CVPixelBufferRef)frame->planes[3]; HARDWARE FRAMES: VIDEOTOOLBOX { unwrap to access iOS pixel buffer
  13. SOFTWARE FRAMES AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx,

    frame); assert(frame->imgfmt == AV_PIX_FMT_YUV420P); AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx, frame); assert(frame->imgfmt == AV_PIX_FMT_VIDEOTOOLBOX); CVPixelBufferRef img = (CVPixelBufferRef)frame->planes[3]; HARDWARE FRAMES: VIDEOTOOLBOX { unwrap to access iOS pixel buffer // use pixel buffer to: // - render to UIImage on screen // - read video pixel data // - modify pixel data // - upload video frame to OpenGL tex // // or: // - convert back to generic software frame // - filter/render like with software decode
  14. SOFTWARE FRAMES AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx,

    frame); assert(frame->imgfmt == AV_PIX_FMT_YUV420P); AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx, frame); assert(frame->imgfmt == AV_PIX_FMT_VIDEOTOOLBOX); CVPixelBufferRef img = (CVPixelBufferRef)frame->planes[3]; int planes_nb = CVPixelBufferGetPlaneCount(img); CVPixelBufferLockBaseAddress(img, 0); for (int i = 0; i < planes_nb; i++) { size_t height = CVPixelBufferGetHeightOfPlane(img,i); size_t rowsize = CVPixelBufferGetBytesPerRowOfPlane(img,i); uint8_t *rowdata = CVPixelBufferGetBaseAddressOfPlane(img,i); // modify rowdata } CVPixelBufferUnlockBaseAddress(img, 0); HARDWARE FRAMES: VIDEOTOOLBOX { read/write access to underlying memory
  15. SOFTWARE FRAMES AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx,

    frame); assert(frame->imgfmt == AV_PIX_FMT_YUV420P); AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx, frame); assert(frame->imgfmt == AV_PIX_FMT_VIDEOTOOLBOX); CVPixelBufferRef img = (CVPixelBufferRef)frame->planes[3]; int planes_nb = CVPixelBufferGetPlaneCount(img); for (int i = 0; i < planes_nb; i++) { CVOpenGLESTextureCacheCreateTextureFromImage( ... ); GLuint tex = CVOpenGLESTextureGetName(plane); // pass to GL shader for rendering } HARDWARE FRAMES: VIDEOTOOLBOX { transfer each plane to a OpenGL texture
  16. SOFTWARE FRAMES AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx,

    frame); assert(frame->imgfmt == AV_PIX_FMT_YUV420P); AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx, frame); assert(frame->imgfmt == AV_PIX_FMT_VIDEOTOOLBOX); AVFrame *swframe = av_frame_alloc(); av_hwframe_transfer_data(swframe, frame, 0); assert(swframe->imgfmt == AV_PIX_FMT_YUV420P); HARDWARE FRAMES: VIDEOTOOLBOX { convert back to a regular software frame
  17. SOFTWARE DECODING AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_H264); AVCodecContext *avctx = avcodec_alloc_context3(codec);

    avcodec_open2(avctx, codec, NULL); JavaVM *vm = ...; // via JNI_OnLoad() etc av_jni_set_java_vm(vm, NULL); AVCodec *codec = avcodec_find_decoder_by_name(“h264_mediacodec”) AVCodecContext *avctx = avcodec_alloc_context3(codec); avcodec_open2(avctx, codec, NULL); HARDWARE DECODING: MEDIACODEC
  18. SOFTWARE DECODING AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_H264); AVCodecContext *avctx = avcodec_alloc_context3(codec);

    avcodec_open2(avctx, codec, NULL); JavaVM *vm = ...; // via JNI_OnLoad() etc av_jni_set_java_vm(vm, NULL); AVCodec *codec = avcodec_find_decoder_by_name(“h264_mediacodec”) AVCodecContext *avctx = avcodec_alloc_context3(codec); avcodec_open2(avctx, codec, NULL); HARDWARE DECODING: MEDIACODEC { allow FFmpeg to access Android Java APIs
  19. SOFTWARE DECODING AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_H264); AVCodecContext *avctx = avcodec_alloc_context3(codec);

    avcodec_open2(avctx, codec, NULL); JavaVM *vm = ...; // via JNI_OnLoad() etc av_jni_set_java_vm(vm, NULL); AVCodec *codec = avcodec_find_decoder_by_name(“h264_mediacodec") AVCodecContext *avctx = avcodec_alloc_context3(codec); avcodec_open2(avctx, codec, NULL); HARDWARE DECODING: MEDIACODEC { implemented as a separate decoder
  20. SOFTWARE FRAMES AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx,

    frame); assert(frame->imgfmt == AV_PIX_FMT_YUV420P); HARDWARE FRAMES: MEDIACODEC AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx, frame); assert(frame->imgfmt == AV_PIX_FMT_NV12);
  21. SOFTWARE FRAMES AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx,

    frame); assert(frame->imgfmt == AV_PIX_FMT_YUV420P); HARDWARE FRAMES: MEDIACODEC AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx, frame); assert(frame->imgfmt == AV_PIX_FMT_NV12); { generic pixel format { generic pixel format (decoded frame is copied back)
  22. SOFTWARE DECODING AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_H264); AVCodecContext *avctx = avcodec_alloc_context3(codec);

    avcodec_open2(avctx, codec, NULL); JavaVM *vm = ...; // via JNI_OnLoad() etc av_jni_set_java_vm(vm, NULL); AVCodec *codec = avcodec_find_decoder_by_name(“h264_mediacodec") AVCodecContext *avctx = avcodec_alloc_context3(codec); jobject surface = ...; // android.view.Surface AVBufferRef *device_ref = av_hwdevice_ctx_alloc( AV_HWDEVICE_TYPE_MEDIACODEC); AVHWDeviceContext *ctx = (void *)device_ref->data; AVMediaCodecDeviceContext *hwctx = ctx->hwctx; hwctx->surface = (void *)(intptr_t)surface; av_hwdevice_ctx_init(device_ref); avctx->hw_device_ctx = device_ref; avcodec_open2(avctx, codec, NULL); HARDWARE DECODING: MEDIACODEC SURFACE
  23. SOFTWARE DECODING AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_H264); AVCodecContext *avctx = avcodec_alloc_context3(codec);

    avcodec_open2(avctx, codec, NULL); JavaVM *vm = ...; // via JNI_OnLoad() etc av_jni_set_java_vm(vm, NULL); AVCodec *codec = avcodec_find_decoder_by_name(“h264_mediacodec") AVCodecContext *avctx = avcodec_alloc_context3(codec); jobject surface = ...; // android.view.Surface AVBufferRef *device_ref = av_hwdevice_ctx_alloc( AV_HWDEVICE_TYPE_MEDIACODEC); AVHWDeviceContext *ctx = (void *)device_ref->data; AVMediaCodecDeviceContext *hwctx = ctx->hwctx; hwctx->surface = (void *)(intptr_t)surface; av_hwdevice_ctx_init(device_ref); avctx->hw_device_ctx = device_ref; avcodec_open2(avctx, codec, NULL); HARDWARE DECODING: MEDIACODEC SURFACE { create HWDEVICE context to pass in Surface
  24. SOFTWARE FRAMES AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx,

    frame); assert(frame->imgfmt == AV_PIX_FMT_YUV420P); HARDWARE FRAMES: MEDIACODEC SURFACE AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx, frame); assert(frame->imgfmt == AV_PIX_FMT_MEDIACODEC);
  25. SOFTWARE FRAMES AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx,

    frame); assert(frame->imgfmt == AV_PIX_FMT_YUV420P); HARDWARE FRAMES: MEDIACODEC SURFACE AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx, frame); assert(frame->imgfmt == AV_PIX_FMT_MEDIACODEC); { platform specific pixel format { generic pixel format
  26. SOFTWARE FRAMES AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx,

    frame); assert(frame->imgfmt == AV_PIX_FMT_YUV420P); HARDWARE FRAMES: MEDIACODEC SURFACE AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx, frame); assert(frame->imgfmt == AV_PIX_FMT_MEDIACODEC); AVMediaCodecBuffer *buffer = (AVMediaCodecBuffer *)frame->planes[3]; { unwrap to access MediaCodec Output Buffer
  27. SOFTWARE FRAMES AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx,

    frame); assert(frame->imgfmt == AV_PIX_FMT_YUV420P); HARDWARE FRAMES: MEDIACODEC SURFACE AVFrame *frame = av_frame_alloc(); int ret = avcodec_receive_frame(avctx, frame); assert(frame->imgfmt == AV_PIX_FMT_MEDIACODEC); AVMediaCodecBuffer *buffer = (AVMediaCodecBuffer *)frame->planes[3]; // drop frame av_mediacodec_release_buffer(buffer, 0); // render to surface av_mediacodec_release_buffer(buffer, 1); // render at clock time av_mediacodec_render_buffer_at_time(buffer, nanotime); { render Output Buffer to screen
  28. iOS STATE OF THE FFmpeg ☑ videotoolbox encoder h264, hevc

    ☑ videotoolbox hwaccel decoder h264, hevc ⬜ videotoolbox decoder (async) ☑ audiotoolbox encoder aac, alac, ilbc, pcm ☑ audiotoolbox decoder aac, ac3, eac3, mp3, … ⬜ mediacodec video encoder ☑ mediacodec video decoder mpeg2, h264, hevc ⬜ mediacodec audio encoder ⬜ mediacodec audio decoder ⬜ replace JNI with NDK (API 21+) ⬜ use async NDK decode (API 27+)