WebRTC VideoEngine超详细教程（三）——集成X264编码和ffmpeg解码

WebRTC VideoEngine超详细教程（三）——集成X264编码和ffmpeg解码
转自：http://blog.csdn.net/nonmarking/article/details/47958395

本系列目前共三篇文章，后续还会更新

WebRTC VideoEngine超详细教程（一）——视频通话的基本流程

WebRTC VideoEngine超详细教程（二）——集成OPENH264编解码器

WebRTC VideoEngine超详细教程（三）——集成X264编码和ffmpeg解码

总述

在前一篇文章中，讲解了如何将OPENH264编解码器集成到WebRTC中，但是OPENH264只能编码baseline的H264视频，而且就编码质量而言，还是X264最好，本文就来讲解一下如何将X264编码器集成到WebRTC中，为了实现解码，同时要用到ffmpeg。总体流程和之前一样，分为重新封装编解码器和注册调用两大步骤，注册调用这一步没有任何不同，主要是重新封装这一步骤有较大区别。

重新封装X264编码功能

首先当然还是要下载X264源码编译出相应的库以供调用。在windows下使用mingw进行编译，再使用poxports工具导出库，最后得到libx264.dll和libx264.lib，同时把x264.h和x264_config.h总共四个文件放到工程目录下，并在项目属性中进行相应配置。

使用x264进行视频编码的基本流程如下
[cpp] view plain copy

#include <stdint.h>

#include <stdio.h>

#include <x264.h>



int main( int argc, char **argv )

{

    int width, height;

    x264_param_t param;

    x264_picture_t pic;

    x264_picture_t pic_out;

    x264_t *h;

    int i_frame = 0;

    int i_frame_size;

    x264_nal_t *nal;

    int i_nal;



    /* Get default params for preset/tuning */

    if( x264_param_default_preset( &param, "medium", NULL ) < 0 )

        goto fail;



    /* Configure non-default params */

    param.i_csp = X264_CSP_I420;

    param.i_width  = width;

    param.i_height = height;

    param.b_vfr_input = 0;

    param.b_repeat_headers = 1;

    param.b_annexb = 1;



    /* Apply profile restrictions. */

    if( x264_param_apply_profile( &param, "high" ) < 0 )

        goto fail;



    if( x264_picture_alloc( &pic, param.i_csp, param.i_width, param.i_height ) < 0 )

        goto fail;



    h = x264_encoder_open( &param);

    if( !h )

        goto fail;



    int luma_size = width * height;

    int chroma_size = luma_size / 4;

    /* Encode frames */

    for( ;; i_frame++ )

    {

        /* Read input frame */

        if( fread( pic.img.plane[0], 1, luma_size, stdin ) != luma_size )

            break;

        if( fread( pic.img.plane[1], 1, chroma_size, stdin ) != chroma_size )

            break;

        if( fread( pic.img.plane[2], 1, chroma_size, stdin ) != chroma_size )

            break;



        pic.i_pts = i_frame;

        i_frame_size = x264_encoder_encode( h, &nal, &i_nal, &pic, &pic_out );

        if( i_frame_size < 0 )

            goto fail;

        else if( i_frame_size )

        {

            if( !fwrite( nal->p_payload, i_frame_size, 1, stdout ) )

                goto fail;

        }

    }

    /* Flush delayed frames */

    while( x264_encoder_delayed_frames( h ) )

    {

        i_frame_size = x264_encoder_encode( h, &nal, &i_nal, NULL, &pic_out );

        if( i_frame_size < 0 )

            goto fail;

        else if( i_frame_size )

        {

            if( !fwrite( nal->p_payload, i_frame_size, 1, stdout ) )

                goto fail;

        }

    }



    x264_encoder_close( h );

    x264_picture_clean( &pic );

    return 0;

}
还是一样，照葫芦画瓢，改写上一篇文章中提到的H264EncoderImpl类
首先是类的定义，去掉了原来的私有成员变量ISVCEncoder* encoder_，加入了以下几项，其他内容不变
[cpp] view plain copy

x264_picture_t pic;

x264_picture_t pic_out;

x264_t *encoder_;

int i_frame = 0;//frame index

x264_nal_t *nal;
相应的，构造函数和析构函数也要改变，这里就不赘述了，重点看InitEncode方法和Encode方法。
InitEncode方法的实现改写如下
[cpp] view plain copy

int H264EncoderImpl::InitEncode(const VideoCodec* inst,

        int number_of_cores,

        size_t max_payload_size) {

        if (inst == NULL) {

            return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;

        }

        if (inst->maxFramerate < 1) {

            return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;

        }

        // allow zero to represent an unspecified maxBitRate

        if (inst->maxBitrate > 0 && inst->startBitrate > inst->maxBitrate) {

            return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;

        }

        if (inst->width < 1 || inst->height < 1) {

            return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;

        }

        if (number_of_cores < 1) {

            return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;

        }



        int ret_val = Release();

        if (ret_val < 0) {

            return ret_val;

        }

        /* Get default params for preset/tuning */

        x264_param_t param;

        ret_val = x264_param_default_preset(&param, "medium", NULL);

        if (ret_val != 0) {

            WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,

                "H264EncoderImpl::InitEncode() fails to initialize encoder ret_val %d",

                ret_val);

            x264_encoder_close(encoder_);

            encoder_ = NULL;

            return WEBRTC_VIDEO_CODEC_ERROR;

        }

        /* Configure non-default params */

        param.i_csp = X264_CSP_I420;

        param.i_width = inst->width;

        param.i_height = inst->height;

        param.b_vfr_input = 0;

        param.b_repeat_headers = 1;

        param.b_annexb = 0;//这里设置为0，是为了使编码后的NAL统一有4字节的起始码，便于处理，否则会同时有3字节和4字节的起始码，很麻烦

        param.i_fps_num = 1;

        param.i_fps_num = codec_.maxFramerate;

        param.rc.i_bitrate = codec_.maxBitrate;

        /* Apply profile restrictions. */

        ret_val = x264_param_apply_profile(&param, "high");

        if (ret_val != 0) {

            WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,

                "H264EncoderImpl::InitEncode() fails to initialize encoder ret_val %d",

                ret_val);

            x264_encoder_close(encoder_);

            encoder_ = NULL;

            return WEBRTC_VIDEO_CODEC_ERROR;

        }



        ret_val = x264_picture_alloc(&pic, param.i_csp, param.i_width, param.i_height);

        if (ret_val != 0) {

            WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,

                "H264EncoderImpl::InitEncode() fails to initialize encoder ret_val %d",

                ret_val);

            x264_encoder_close(encoder_);

            encoder_ = NULL;

            return WEBRTC_VIDEO_CODEC_ERROR;

        }



        encoder_ = x264_encoder_open(&param);

        if (!encoder_){

            WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,

                "H264EncoderImpl::InitEncode() fails to initialize encoder ret_val %d",

                ret_val);

            x264_encoder_close(encoder_);

            x264_picture_clean(&pic);

            encoder_ = NULL;

            return WEBRTC_VIDEO_CODEC_ERROR;

        }



        if (&codec_ != inst) {

            codec_ = *inst;

        }



        if (encoded_image_._buffer != NULL) {

            delete[] encoded_image_._buffer;

        }

        encoded_image_._size = CalcBufferSize(kI420, codec_.width, codec_.height);

        encoded_image_._buffer = new uint8_t[encoded_image_._size];

        encoded_image_._completeFrame = true;



        inited_ = true;

        WEBRTC_TRACE(webrtc::kTraceApiCall, webrtc::kTraceVideoCoding, -1,

            "H264EncoderImpl::InitEncode(%d, height:%d, framerate:%d, start_bitrate:%d, max_bitrate:%d)",

            inst->width, inst->height, inst->maxFramerate, inst->startBitrate, inst->maxBitrate);



        return WEBRTC_VIDEO_CODEC_OK;

    }
Encode方法的实现改写如下
[cpp] view plain copy

int H264EncoderImpl::Encode(const I420VideoFrame& input_image,

        const CodecSpecificInfo* codec_specific_info,

        const std::vector<VideoFrameType>* frame_types) {

        if (!inited_) {

            return WEBRTC_VIDEO_CODEC_UNINITIALIZED;

        }

        if (input_image.IsZeroSize()) {

            return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;

        }

        if (encoded_complete_callback_ == NULL) {

            return WEBRTC_VIDEO_CODEC_UNINITIALIZED;

        }



        VideoFrameType frame_type = kDeltaFrame;

        // We only support one stream at the moment.

        if (frame_types && frame_types->size() > 0) {

            frame_type = (*frame_types)[0];

        }



        bool send_keyframe = (frame_type == kKeyFrame);

        if (send_keyframe) {

            pic.b_keyframe = TRUE;

            WEBRTC_TRACE(webrtc::kTraceApiCall, webrtc::kTraceVideoCoding, -1,

                "H264EncoderImpl::EncodeKeyFrame(%d, height:%d)",

                input_image.width(), input_image.height());

        }



        // Check for change in frame size.

        if (input_image.width() != codec_.width ||

            input_image.height() != codec_.height) {

            int ret = UpdateCodecFrameSize(input_image);

            if (ret < 0) {

                return ret;

            }

        }



        /* Read input frame */

        pic.img.plane[0] = const_cast<uint8_t*>(input_image.buffer(kYPlane));

        pic.img.plane[1] = const_cast<uint8_t*>(input_image.buffer(kUPlane));

        pic.img.plane[2] = const_cast<uint8_t*>(input_image.buffer(kVPlane));

        pic.i_pts = i_frame;



        int i_nal = 0;

        int i_frame_size = x264_encoder_encode(encoder_, &nal, &i_nal, &pic, &pic_out);

        if (i_frame_size < 0)

        {

            WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,

                "H264EncoderImpl::Encode() fails to encode %d",

                i_frame_size);

            x264_encoder_close(encoder_);

            x264_picture_clean(&pic);

            encoder_ = NULL;

            return WEBRTC_VIDEO_CODEC_ERROR;

        }



        RTPFragmentationHeader frag_info;



        if (i_frame_size)

        {

            if (i_nal == 0) {

                return WEBRTC_VIDEO_CODEC_OK;

            }

            frag_info.VerifyAndAllocateFragmentationHeader(i_nal);



            encoded_image_._length = 0;



            uint32_t totalNaluIndex = 0;

            for (int nal_index = 0; nal_index < i_nal; nal_index++)

            {

                uint32_t currentNaluSize = 0;

                currentNaluSize = nal[nal_index].i_payload - 4; //x264_encoder_encode编码得到的nal单元是已经带有起始码的，此外，这里直接使用nal[index]即可，不必再使用x264_nal_encode函数

                memcpy(encoded_image_._buffer + encoded_image_._length, nal[nal_index].p_payload + 4, currentNaluSize);//encoded_image_中存有的是去掉起始码的数据

                encoded_image_._length += currentNaluSize;



                WEBRTC_TRACE(webrtc::kTraceApiCall, webrtc::kTraceVideoCoding, -1,

                    "H264EncoderImpl::Encode() nal_type %d, length:%d",

                    nal[nal_index].i_type, encoded_image_._length);



                frag_info.fragmentationOffset[totalNaluIndex] = encoded_image_._length - currentNaluSize;

                frag_info.fragmentationLength[totalNaluIndex] = currentNaluSize;

                frag_info.fragmentationPlType[totalNaluIndex] = nal[nal_index].i_type;

                frag_info.fragmentationTimeDiff[totalNaluIndex] = 0;

                totalNaluIndex++;

            }

        }

        i_frame++;

        if (encoded_image_._length > 0) {

            encoded_image_._timeStamp = input_image.timestamp();

            encoded_image_.capture_time_ms_ = input_image.render_time_ms();

            encoded_image_._encodedHeight = codec_.height;

            encoded_image_._encodedWidth = codec_.width;

            encoded_image_._frameType = frame_type;

            // call back

            encoded_complete_callback_->Encoded(encoded_image_, NULL, &frag_info);

        }

        return WEBRTC_VIDEO_CODEC_OK;

    }
其他方法的实现均没有改变。
至此，X264编码器重新封装完毕，还是比较好理解的。

重新封装ffmpeg解码功能

首先还是一样，获得ffmpeg的头文件和库文件，加入工程中并进行相应设置，这里只需使用avcodec avformat avutil swscale四个库，头文件也可以做相应的删减。

ffmpeg解码的基本流程如下，实际集成之后是从WebRTC的EncodedImage& input_image中获得待解码数据的，所以不能使用常见的基于文件的解码流程
[cpp] view plain copy

AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_H264);

AVCodecContext *codecCtx = avcodec_alloc_context3(codec);

avcodec_open2(codecCtx, codec, nil);

char *videoData;

int len;

AVFrame *frame = av_frame_alloc();

AVPacket packet;

av_new_packet(&packet, len);

memcpy(packet.data, videoData, len);

int ret, got_picture;

ret = avcodec_decode_video2(codecCtx, frame, &got_picture, &packet);

if (ret > 0){

    if(got_picture){

    //进行下一步的处理

    }

}
相应的，对H264DecoderImpl类的定义和各方法的实现要进行改写。
首先是类的定义，去掉了ISVCDecoder* decoder_，加入了以下私有成员变量
[cpp] view plain copy

AVCodecContext  *pCodecCtx;

  AVCodec           *pCodec;

  AVFrame   *pFrame, *pFrameYUV;

  AVPacket *packet;

  struct SwsContext *img_convert_ctx;

  uint8_t *decode_buffer;//存储最开始收到的SPS、PPS和IDR帧以便进行最开始的解码

  uint8_t *out_buffer;

  int framecnt = 0;

  int encoded_length = 0;
构造函数和析构函数的改写省略不表，重点看一下InitDecode方法和Decode方法

InitDecode方法改写如下
[cpp] view plain copy

int H264DecoderImpl::InitDecode(const VideoCodec* inst, int number_of_cores) {

        if (inst == NULL) {

            return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;

        }

        int ret_val = Release();

        if (ret_val < 0) {

            return ret_val;

        }



        if (&codec_ != inst) {

            // Save VideoCodec instance for later; mainly for duplicating the decoder.

            codec_ = *inst;

        }

        pCodec = avcodec_find_decoder(AV_CODEC_ID_H264);

        pCodecCtx = avcodec_alloc_context3(pCodec);

        pCodecCtx->pix_fmt = PIX_FMT_YUV420P;

        pCodecCtx->width = codec_.width;

        pCodecCtx->height = codec_.height;

        //pCodecCtx->bit_rate = codec_.targetBitrate*1000;

        pCodecCtx->time_base.num = 1;

        pCodecCtx->time_base.den = codec_.maxFramerate;



        if (pCodec == NULL){

            WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,

                "H264DecoderImpl::InitDecode, Codec not found.");

            return WEBRTC_VIDEO_CODEC_ERROR;

        }

        if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0){

            WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,

                "H264DecoderImpl::InitDecode, Could not open codec.");

            return WEBRTC_VIDEO_CODEC_ERROR;

        }

        inited_ = true;



        // Always start with a complete key frame.

        key_frame_required_ = true;

        WEBRTC_TRACE(webrtc::kTraceApiCall, webrtc::kTraceVideoCoding, -1,

            "H264DecoderImpl::InitDecode(%d, height:%d, framerate:%d, start_bitrate:%d, max_bitrate:%d)",

            inst->width, inst->height, inst->maxFramerate, inst->startBitrate, inst->maxBitrate);

        return WEBRTC_VIDEO_CODEC_OK;

    }

Decode方法的实现改写如下
[cpp] view plain copy

int H264DecoderImpl::Decode(const EncodedImage& input_image,

        bool missing_frames,

        const RTPFragmentationHeader* fragmentation,

        const CodecSpecificInfo* codec_specific_info,

        int64_t /*render_time_ms*/) {

        if (!inited_) {

            WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,

                "H264DecoderImpl::Decode, decoder is not initialized");

            return WEBRTC_VIDEO_CODEC_UNINITIALIZED;

        }



        if (decode_complete_callback_ == NULL) {

            WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,

                "H264DecoderImpl::Decode, decode complete call back is not set");

            return WEBRTC_VIDEO_CODEC_UNINITIALIZED;

        }



        if (input_image._buffer == NULL) {

            WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,

                "H264DecoderImpl::Decode, null buffer");

            return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;

        }

        if (!codec_specific_info) {

            WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,

                "H264EncoderImpl::Decode, no codec info");

            return WEBRTC_VIDEO_CODEC_ERROR;

        }

        if (codec_specific_info->codecType != kVideoCodecH264) {

            WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,

                "H264EncoderImpl::Decode, non h264 codec %d", codec_specific_info->codecType);

            return WEBRTC_VIDEO_CODEC_ERROR;

        }



        WEBRTC_TRACE(webrtc::kTraceApiCall, webrtc::kTraceVideoCoding, -1,

            "H264DecoderImpl::Decode(frame_type:%d, length:%d",

            input_image._frameType, input_image._length);



        if (framecnt < 2)

        {//存储最开始的SPS PPS 和 IDR帧以便进行初始的解码

            memcpy(decode_buffer + encoded_length, input_image._buffer, input_image._length);

            encoded_length += input_image._length;

            framecnt++;

        }

        else

        {

            pFrame = av_frame_alloc();

            pFrameYUV = av_frame_alloc();

            out_buffer = (uint8_t *)av_malloc(avpicture_get_size(PIX_FMT_YUV420P, pCodecCtx->width, pCodecCtx->height));

            avpicture_fill((AVPicture *)pFrameYUV, out_buffer, PIX_FMT_YUV420P, pCodecCtx->width, pCodecCtx->height);

            img_convert_ctx = sws_getContext(pCodecCtx->width, pCodecCtx->height, pCodecCtx->pix_fmt,

                pCodecCtx->width, pCodecCtx->height, PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL);



            if (framecnt == 2)

            {

                packet = (AVPacket *)av_malloc(sizeof(AVPacket));

                av_new_packet(packet, encoded_length);

                memcpy(packet->data, decode_buffer, encoded_length);

                av_free(decode_buffer);

                framecnt++;

                printf(" Loading");

            }

            else

            {

                packet = (AVPacket *)av_malloc(sizeof(AVPacket));

                av_new_packet(packet, input_image._length);

                memcpy(packet->data, input_image._buffer, input_image._length);

            }



            int got_picture = 0;

            int ret = avcodec_decode_video2(pCodecCtx, pFrame, &got_picture, packet);

            if (ret < 0){

                WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,

                    "H264DecoderImpl::Decode, Decode Error.");

                return WEBRTC_VIDEO_CODEC_ERROR;

            }

            if (got_picture){

                sws_scale(img_convert_ctx, (const uint8_t* const*)pFrame->data, pFrame->linesize, 0, pCodecCtx->height,

                    pFrameYUV->data, pFrameYUV->linesize);



                int size_y = pFrameYUV->linesize[0] * pCodecCtx->height;

                int size_u = pFrameYUV->linesize[1] * pCodecCtx->height / 2;

                int size_v = pFrameYUV->linesize[2] * pCodecCtx->height / 2;



                decoded_image_.CreateFrame(size_y, static_cast<uint8_t*>(pFrameYUV->data[0]),

                    size_u, static_cast<uint8_t*>(pFrameYUV->data[1]),

                    size_v, static_cast<uint8_t*>(pFrameYUV->data[2]),

                    pCodecCtx->width,

                    pCodecCtx->height,

                    pFrameYUV->linesize[0],

                    pFrameYUV->linesize[1],

                    pFrameYUV->linesize[2]);



                decoded_image_.set_timestamp(input_image._timeStamp);

                decode_complete_callback_->Decoded(decoded_image_);

                return WEBRTC_VIDEO_CODEC_OK;

            }

            else

                printf(".");

            av_free_packet(packet);

        }

        return WEBRTC_VIDEO_CODEC_OK;

    }

其他方法的实现保持不变，至此ffmpeg解码功能的重新封装也完成了。
从最后实现的效果来看，X264的视频质量的确是最好的，但是播放端的解码延时比较高，暂时还不清楚原因，希望了解的朋友指教。
本项目源代码
相关阅读:
python学习笔记——拾
 python学习笔记——玖
 Python 实现栈与队列
 Vijos1774 机器翻译 [模拟]
Vijos1788 第K大 [模拟]
Python 序列求和
 HDU 2102 A计划 DFS与BFS两种写法 [搜索]
Python 多组输入
 Python 文件读写
 HDU 2068 RPG错排 [错排公式]
原文地址：https://www.cnblogs.com/x_wukong/p/4880961.html

WebRTC VideoEngine超详细教程（三）——集成X264编码和ffmpeg解码

总述

重新封装X264编码功能

重新封装ffmpeg解码功能