基础概念
我们平时看到的视频文件有许多格式,比如 avi, mkv, rmvb, mov, mp4等等,这些被称为容器(Container), 不同的容器格式规定了其中音视频数据的组织方式(也包括其他数据,比如字幕等)。容器中一般会封装有视频和音频轨,也称为视频流(stream)和音频 流,播放视频文件的第一步就是根据视频文件的格式,解析(demux)出其中封装的视频流、音频流以及字幕(如果有的话),解析的数据读到包 (packet)中,每个包里保存的是视频帧(frame)或音频帧,然后分别对视频帧和音频帧调用相应的解码器(decoder)进行解码,比如使用
H.264编码的视频和MP3编码的音频,会相应的调用H.264解码器和MP3解码器,解码之后得到的就是原始的图像(YUV or RGB)和声音(PCM)数据,然后根据同步好的时间将图像显示到屏幕上,将声音输出到声卡,最终就是我们看到的视频。
解码流程
10 OPEN video_stream FROM video.avi
20 READ packet FROM video_stream INTO frame
30 IF frame NOT COMPLETE GOTO 20
40 DO SOMETHING WITH frame
50 GOTO 20
20 READ packet FROM video_stream INTO frame
30 IF frame NOT COMPLETE GOTO 20
40 DO SOMETHING WITH frame
50 GOTO 20
源码/*
视频截图
*/
#include <stdio.h>
extern "C"
{
#include <libavformat/avformat.h>
#include <libavcodec/avcodec.h>
#include <libswscale/swscale.h>
#include <libavutil/imgutils.h>
}
void SaveFrame(AVFrame*, int, int, int);
int main(int argc, char *argv[])
{
AVFormatContext *pFormatCtx = NULL;
AVCodecContext *pCodecCtx = NULL;
AVCodec *pCodec = NULL;
AVPacket packet;
AVFrame *pFrame = NULL, *pFrameRGB = NULL;
unsigned char *buffer = NULL;
struct SwsContext *img_convert_ctx = NULL;
int i, VideoStream;
int FrameFinished;
char filename[32] = "Titanic.ts";
/**
* 注册所有文件格式和编解码器库
* Initialize libavformat and register all the muxers, demuxers and
* protocols. If you do not call this function, then you can select
* exactly which formats you want to support.
*
* @see av_register_input_format()
* @see av_register_output_format()
*/
av_register_all();
/**
* pFormatCtx使用之前必须初始化
* 1. pFormatCtx = NULL;
* 或者:
* 2. pFormatCtx = avformat_alloc_context();
*/
/**
* 打开视频文件,读取文件头信息(编解码器没有打开)
* Open an input stream and read the header
* 函数声明:int avformat_open_input(AVFormatContext **ps, const char *url, AVInputFormat *fmt, AVDictionary **options);
* Open an input stream and read the header. The codecs are not opened.
* The stream must be closed with avformat_close_input().
*
* @param ps Pointer to user-supplied AVFormatContext (allocated by avformat_alloc_context).
* May be a pointer to NULL, in which case an AVFormatContext is allocated by this
* function and written into ps.
* Note that a user-supplied AVFormatContext will be freed on failure.
* @param url URL of the stream to open.
* @param fmt If non-NULL, this parameter forces a specific input format.
* Otherwise the format is autodetected.
* @param options A dictionary filled with AVFormatContext and demuxer-private options.
* On return this parameter will be destroyed and replaced with a dict containing
* options that were not found. May be NULL.
*
* @return 0 on success, a negative AVERROR on failure.
*
* @note If you want to use custom IO, preallocate the format context and set its pb field.
*/
if (avformat_open_input(&pFormatCtx, filename, NULL, NULL) != 0)
{
printf("Couldn't open an input stream.
");
return -1;
}
/**
* 读取流信息
* get stream information
* 函数声明:int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options);
* Read packets of a media file to get stream information. This
* is useful for file formats with no headers such as MPEG. This
* function also computes the real framerate in case of MPEG-2 repeat
* frame mode.
* The logical file position is not changed by this function;
* examined packets may be buffered for later processing.
*
* @param ic media file handle
* @param options If non-NULL, an ic.nb_streams long array of pointers to
* dictionaries, where i-th member contains options for
* codec corresponding to i-th stream.
* On return each dictionary will be filled with options that were not found.
* @return >=0 if OK, AVERROR_xxx on error
*
* @note this function isn't guaranteed to open all the codecs, so
* options being non-empty at return is a perfectly normal behavior.
*
* @todo Let the user decide somehow what information is needed so that
* we do not waste time getting stuff the user does not need.
*/
if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
{
printf("Couldn't find stream information.
");
return -1;
}
/**
* 打印输入视频文件信息
* 函数声明:void av_dump_format(AVFormatContext *ic, int index, const char *url, int is_output);
* Print detailed information about the input or output format, such as
* duration, bitrate, streams, container, programs, metadata, side data,
* codec and time base.
*
* @param ic the context to analyze
* @param index index of the stream to dump information about
* -1表示ffmpeg自己选择
* @param url the URL to print, such as source or destination file
* @param is_output Select whether the specified context is an input(0) or output(1)
**/
av_dump_format(pFormatCtx, -1, filename, 0);
//Find the first video stream
VideoStream = -1;
for (i = 0; i < pFormatCtx->nb_streams; i++)
{
if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
{
VideoStream = i;
break;
}
}
if (VideoStream == -1)
{
printf("Couldn't find a video stream.
");
return -1;
}
pCodecCtx = pFormatCtx->streams[VideoStream]->codec;
/**
* 函数声明:AVCodec *avcodec_find_decoder(enum AVCodecID id);
* Find a registered decoder with a matching codec ID.
*
* @param id AVCodecID of the requested decoder
* @return A decoder if one was found, NULL otherwise.
*/
pCodec = avcodec_find_decoder(pCodecCtx->codec_id);
if (pCodec == NULL)
{
printf("Codec not found.
");
return -1;
}
//open codec
if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0)
{
printf("Could not open codec.
");
return -1;
}
/**
* 函数声明:AVFrame *av_frame_alloc(void);
* Allocate an AVFrame and set its fields to default values. The resulting
* struct must be freed using av_frame_free().
*
* @return An AVFrame filled with default values or NULL on failure.
*
* @note this only allocates the AVFrame itself, not the data buffers. Those
* must be allocated through other means, e.g. with av_frame_get_buffer() or
* manually.
*/
/**
* 分配图像缓存
* pFrame 用于存储解码后的数据
* pFrameRGB 用于存储转换后的数据
*/
pFrame = av_frame_alloc();
pFrameRGB = av_frame_alloc();
if (pFrame == NULL || pFrameRGB == NULL)
{
printf("memory allocation error
");
return -1;
}
/**
* 函数声明:void *av_malloc(size_t size) av_malloc_attrib av_alloc_size(1);
* Allocate a block of size bytes with alignment suitable for all
* memory accesses (including vectors if available on the CPU).
* @param size Size in bytes for the memory block to be allocated.
* @return Pointer to the allocated block, NULL if the block cannot
* be allocated.
* @see av_mallocz()
*/
/**
* 函数声明:int av_image_get_buffer_size(enum AVPixelFormat pix_fmt, int width, int height, int align);
* Return the size in bytes of the amount of data required to store an
* image with the given parameters.
*
* @param[in] align the assumed linesize alignment(按照多少字节对齐)
*/
//计算 RGB24 格式的图像需要占用的空间大小,分配内存空间
buffer = (unsigned char *)av_malloc(av_image_get_buffer_size(AV_PIX_FMT_RGB24, pCodecCtx->width, pCodecCtx->height, 1));
/**
* 函数声明:int av_image_fill_arrays(uint8_t *dst_data[4], int dst_linesize[4], const uint8_t *src, enum AVPixelFormat pix_fmt, int width, int height, int align);
* Setup the data pointers and linesizes based on the specified image
* parameters and the provided array.
*
* The fields of the given image are filled in by using the src
* address which points to the image data buffer. Depending on the
* specified pixel format, one or multiple image data pointers and
* line sizes will be set. If a planar format is specified, several
* pointers will be set pointing to the different picture planes and
* the line sizes of the different planes will be stored in the
* lines_sizes array. Call with src == NULL to get the required
* size for the src buffer.
*
* To allocate the buffer and fill in the dst_data and dst_linesize in
* one call, use av_image_alloc().
*
* @param dst_data data pointers to be filled in
* @param dst_linesizes linesizes for the image in dst_data to be filled in
* @param src buffer which will contain or contains the actual image data, can be NULL
* @param pix_fmt the pixel format of the image
* @param width the width of the image in pixels
* @param height the height of the image in pixels
* @param align the value used in src for linesize alignment
* @return the size in bytes required for src, a negative error code
* in case of failure
*/
//将 pFrameRGB 跟 buffer 指向的内存关联起来
av_image_fill_arrays(pFrameRGB->data, pFrameRGB->linesize, buffer, AV_PIX_FMT_RGB24, pCodecCtx->width, pCodecCtx->height, 1);
/**
* 函数声明:struct SwsContext *sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat,
* int dstW, int dstH, enum AVPixelFormat dstFormat,
* int flags, SwsFilter *srcFilter,
* SwsFilter *dstFilter, const double *param);
*
* Allocate and return an SwsContext. You need it to perform
* scaling/conversion operations using sws_scale().
*
* @param srcW the width of the source image
* @param srcH the height of the source image
* @param srcFormat the source image format
* @param dstW the width of the destination image
* @param dstH the height of the destination image
* @param dstFormat the destination image format
* @param flags specify which algorithm and options to use for rescaling
* @param param extra parameters to tune the used scaler
* For SWS_BICUBIC param[0] and [1] tune the shape of the basis
* function, param[0] tunes f(1) and param[1] f´(1)
* For SWS_GAUSS param[0] tunes the exponent and thus cutoff
* frequency
* For SWS_LANCZOS param[0] tunes the width of the window function
* @return a pointer to an allocated context, or NULL in case of error
* @note this function is to be removed after a saner alternative is
* written
*/
//获得图像转换上下文
img_convert_ctx = sws_getContext(pCodecCtx->width,
pCodecCtx->height, pCodecCtx->pix_fmt, pCodecCtx->width,
pCodecCtx->height, AV_PIX_FMT_RGB24, SWS_BILINEAR,
NULL, NULL, NULL);
/**
* 函数声明:int av_read_frame(AVFormatContext *s, AVPacket *pkt);
* Return the next frame of a stream.
* This function returns what is stored in the file, and does not validate
* that what is there are valid frames for the decoder. It will split what is
* stored in the file into frames and return one for each call. It will not
* omit invalid data between valid frames so as to give the decoder the maximum
* information possible for decoding.
*
* If pkt->buf is NULL, then the packet is valid until the next
* av_read_frame() or until avformat_close_input(). Otherwise the packet
* is valid indefinitely. In both cases the packet must be freed with
* av_packet_unref when it is no longer needed. For video, the packet contains
* exactly one frame. For audio, it contains an integer number of frames if each
* frame has a known fixed size (e.g. PCM or ADPCM data). If the audio frames
* have a variable size (e.g. MPEG audio), then it contains one frame.
*
* pkt->pts, pkt->dts and pkt->duration are always set to correct
* values in AVStream.time_base units (and guessed if the format cannot
* provide them). pkt->pts can be AV_NOPTS_VALUE if the video format
* has B-frames, so it is better to rely on pkt->dts if you do not
* decompress the payload.
*
* @return 0 if OK, < 0 on error or end of file
*/
/**
* 从文件中读取一个packet,对于视频来说一个packet里面包含一帧图像数据,音频可能包含多个帧(当音频帧长度固定时)
* 读到这一帧后,如果是视频帧,则使用 avcodec_decode_video2 对packet中的帧进行解码
* 有时候解码器并不能从一个packet中解码得到一帧图像数据(比如在需要其他参考帧的情况下),因此会设置 frameFinished
* 如果已经得到下一帧图像则设置 frameFinished 非零,否则为零。所以这里我们判断 frameFinished 是否为零来确定 pFrame 中是否已经得到解码的图像
* 注意在每次处理完后需要调用 av_free_packet 释放读取的packet
*/
i = 0;
while (av_read_frame(pFormatCtx, &packet) >= 0)
{
if (packet.stream_index == VideoStream)//获得的是视频帧
{
avcodec_decode_video2(pCodecCtx, pFrame, &FrameFinished, &packet);
if (FrameFinished != 0)//获得的是一帧图像数据
{
//将图形从解码后的格式转换为 RGB24
sws_scale(img_convert_ctx, (const unsigned char* const*)pFrame->data,
pFrame->linesize, 0, pCodecCtx->height, pFrameRGB->data,
pFrameRGB->linesize);
//将前50帧写人 ppm 图像文件
if (i < 50)
{
SaveFrame(pFrameRGB, pCodecCtx->width, pCodecCtx->height, i);
i++;
}
}
}
av_free_packet(&packet);
}
//清理内存
sws_freeContext(img_convert_ctx);
av_free(buffer);
av_frame_free(&pFrame);
av_frame_free(&pFrameRGB);
avcodec_close(pCodecCtx);
avformat_close_input(&pFormatCtx);
return 0;
}
static void SaveFrame(AVFrame *pFrame, int width, int height, int iFrame)
{
FILE *pFile;
char szFilename[32];
int y;
sprintf(szFilename, "images\frame%d.ppm", iFrame);
pFile = fopen(szFilename, "wb");
if (pFile == NULL)
{
printf("pFile is null");
return;
}
// Write header
fprintf(pFile, "P6
%d %d
255
", width, height);
// Write pixel data
for (y = 0; y < height; y++)
{
fwrite(pFrame->data[0] + y * pFrame->linesize[0], 1, width * 3, pFile);
}
printf("images\frame%d.ppm
", iFrame);
// Close file
fclose(pFile);
}
对于packed格式的数据(例如RGB24),会存到data[0]里面。
对于planar格式的数据(例如YUV420P),则会分开成data[0],data[1],data[2]...(YUV420P中data[0]存Y,data[1]存U,data[2]存V)
Input #-1, mpegts, from 'Titanic.ts': Duration: 00:00:48.03, start: 1.463400, bitrate: 589 kb/s Program 1 Metadata: service_name : Service01 service_provider: FFmpeg Stream #-1:0[0x100]: Video: h264 (High) ([27][0][0][0] / 0x001B), yuv420p, 6 40x272 [SAR 1:1 DAR 40:17], 23.98 fps, 23.98 tbr, 90k tbn Stream #-1:1[0x101]: Audio: mp3 ([3][0][0][0] / 0x0003), 48000 Hz, stereo, s 16p, 128 kb/s imagesframe0.ppm imagesframe1.ppm imagesframe2.ppm imagesframe3.ppm imagesframe4.ppm imagesframe5.ppm imagesframe6.ppm imagesframe7.ppm imagesframe8.ppm imagesframe9.ppm imagesframe10.ppm imagesframe11.ppm imagesframe12.ppm imagesframe13.ppm imagesframe14.ppm imagesframe15.ppm imagesframe16.ppm imagesframe17.ppm imagesframe18.ppm imagesframe19.ppm imagesframe20.ppm imagesframe21.ppm imagesframe22.ppm imagesframe23.ppm imagesframe24.ppm imagesframe25.ppm imagesframe26.ppm imagesframe27.ppm imagesframe28.ppm imagesframe29.ppm imagesframe30.ppm imagesframe31.ppm imagesframe32.ppm imagesframe33.ppm imagesframe34.ppm imagesframe35.ppm imagesframe36.ppm imagesframe37.ppm imagesframe38.ppm imagesframe39.ppm imagesframe40.ppm imagesframe41.ppm imagesframe42.ppm imagesframe43.ppm imagesframe44.ppm imagesframe45.ppm imagesframe46.ppm imagesframe47.ppm imagesframe48.ppm imagesframe49.ppm 请按任意键继续. . .