• MP4音频解码信息(转帖加注释)


    http://blog.csdn.net/linzhiji/article/details/5840031

    注释:

          1。3gp和MP4中的AAC的私有数据保存在esds的0x05标签的数据,

           结构为 05 + 长度 + 内容。

          将长度赋值给 extradatasize

          将内容赋值给 extradata

          长度的计算函数在ffmpeg中的static int mp4_read_descr_len(ByteIOContext *pb)

          2。avc/h264的extradata和extradata信息在avcc atom中,将avcc atom去掉type和长度(8个字节)后的长度赋予extradatasize,内容赋值给extradata.

    MP4文件格式分为头部和数据两部分,头部是由许多被称作Atom的结构单元嵌套或排列而成,数据部分则完全为实际数据不包含元信息,因此具体解码时音视频帧的位置和大小都要在头部获取。详细内容见以下链接:
    http://wqyuwss.52rd.net 
    这里总结下音频解码信息获取的一些经验,当然详细内容需要查看quick time file format的文档。
    MP4的音频解码信息保存在如下嵌套的Atom中,{moov{mdia{minf{smhd{stbl{stsd}}}}}}
    stsd可能包括多个音频信息的描述,结构如下:

    typedef struct stsdtable
    {
        unsigned int size;//Atom大小
        char format[4];//音频编码格式
        int res1;
        int ref;
        short version;//版本
        short pad1;
        int pad2;
        short channels;//声道
        short bitspersample;
        short compress_id;
        short res2;
        short samplerate1;//采样率
        short samplerate2;
        //{if(version==1)

            int sampleperpacket;
            int bytesperpacket;
            int bytesperframe;
            int bytespersample;
        //}

    } stsdtable;

    其中format对应音频编码格式:
    PCM_S32BE,  in32
    PCM_S32LE,  in32
    PCM_S24BE,  in24
    PCM_S24LE,  in24
    PCM_S16BE,  twos // 16 bits //
    PCM_S16LE,  sowt //  
    PCM_S16LE,  lpcm
    PCM_F32BE,  fl32
    PCM_F64BE,  fl64
    PCM_S8,     sowt
    PCM_U8,     raw  // 8 bits unsigned 
    PCM_U8,     NONE // uncompressed 
    PCM_MULAW,  ulaw //
    PCM_ALAW,   alaw //
    ADPCM_IMA_QT, ima4 // IMA-4 ADPCM //
    MACE3,      MAC3 // Macintosh Audio Compression and Expansion 3:1 ///
    MACE6,      MAC6 // Macintosh Audio Compression and Expansion 6:1 //
    MP3,        .mp3 // MPEG layer 3 */ /* sample files at http://www.3ivx.com/showcase.html use this tag //
    MP3,        0x6D730055  // MPEG layer 3 //
    OGG_VORBIS, OggS //// sample files at http://heroinewarrior.com/xmovie.php3 use this tag //
    AAC,        mp4a // MPEG-4 AAC //
    AC3,        ac-3 // ETSI TS 102 366 Annex F //
    AMR_NB,     samr // AMR-NB 3gp //
    AMR_WB,     sawb // AMR-WB 3gp//
    GSM,        agsm
    ALAC,       alac // Apple Lossless //
    QCELP,      Qclp
    QCELP,      sqcp // ISO Media fourcc //
    QDM2,       QDM2 // QDM2 //
    DVAUDIO,    vdva
    DVAUDIO,    dvca
    WMAV2,      WMA2
    这个获取比较简单,下面是解码私有数据的获取:
    这些解码私有数据也保存在Atom中,通常在上面结构体的后面,有esds、frma、mp4a、wave。AAC的私有数据保存在esds的0x05标签的数据,QDM2的则是"wave"Atom的数据部分(以下按顺序分析):
       4字节 长度
       4字节 "esds" or "m4ds" 标志
       4字节 版本标识

       1字节 ES描述类型标签 0x03
       --3字节 扩展描述类型标签 可能没有
       1字节 描述类型长度 
       2字节 ES ID
       1字节 流优先级

       1字节 解码配置描述类型标签 0x04
       --3字节 扩展描述类型标签 可能没有
       1字节 描述类型长度 
       1字节 描述对象ID 
       1字节 
       3字节 
       4字节
       4字节

       1字节 解码配置描述类型标签 0x05
       --3字节 扩展描述类型标签 可能没有
       1字节 长度 

       1字节 0x06
       0x06不再分析
    下面是一个例子:
    长度 标签 
    00015218h: 00 00 00 10 73 6D 68 64 00 00 00 00 00 00 00 00 ; ....smhd........
    00015228h: 00 00 00 24 64 69 6E 66 00 00 00 1C 64 72 65 66 ; ...$dinf....dref
    00015238h: 00 00 00 00 00 00 00 01 00 00 00 0C 75 72 6C 20 ; ............url 
    00015248h: 00 00 00 01 00 02 C0 97 73 74 62 6C 00 00 00 5B ; ......罈stbl...[
    00015258h: 73 74 73 64 00 00 00 00 00 00 00 01 00 00 00 4B ; stsd...........K
    00015268h: 6D 70 34 61 00 00 00 00 00 00 00 01 00 00 00 00 ; mp4a............
    00015278h: 00 00 00 00 00 01 00 10 00 00 00 00 7D 00 00 00 ; ............}...
    00015288h: 00 00 00 27 65 73 64 73 00 00 00 00 03 19 00 00 ; ...'esds........
    00015298h: 00 04 11 40 15 00 00 D2 00 00 BB 88 00 00 7D 00 ; ...@...?.粓..}.
    000152a8h: 05 02 12 88 06 01 02                            ; ...?..

    0x12 0x88即私有数据(对应ffmpeg中AVCodecContext.extradata)
    下面是mp4音频部分分析的代码:

    //MP4Analyze.h

    #define uint8_t unsigned char

    /******atom tag*******/
    uint8_t moov[] = "moov";
    uint8_t trak[] = "trak";
    uint8_t mdia[] = "mdia";
    uint8_t minf[] = "minf";
    uint8_t stbl[] = "stbl";
    uint8_t stsd[] = "stsd";
    uint8_t stsc[] = "stsc";
    uint8_t stsz[] = "stsz";
    uint8_t stco[] = "stco";
    uint8_t ftyp[] = "ftyp";
    uint8_t mdat[] = "mdat";

    typedef struct Atom
    {
        unsigned int size;
        uint8_t tag[4];
        int ver_flag;
        unsigned int num_of_entries;
        unsigned int pos;
        uint8_t *data;
    } Atom;


    /****audio format****/
    uint8_t kmp3[] = {0x6D,0x73,0x00,0x55};
    uint8_t fmp3[] = ".mp3";
    uint8_t raw[] = "raw ";

    uint8_t wave[] = "wave";
    uint8_t mp4a[] = "mp4a";
    uint8_t enca[] = "enca";//encrypted to ISO/IEC 14496-12 or 3GPP standards

    uint8_t smar[] = "smar";//encoded to 3GPP GSM 6.10 AMR narrowband standards 

    uint8_t sawb[] = "sawb";//encoded to 3GPP GSM 6.10 AMR wideband standards

    uint8_t m4ds[] = "m4ds";//encoded to ISO/IEC 14496-10 AVC standards

    uint8_t esds[] = "esds";
    uint8_t fram[] = "fram";

    /*** We may not need these ***/
    #define MKTAG(a,b,c,d) (| (<< 8) | (<< 16) | (<< 24))
    typedef struct AVCodecTag {
        int id;
        unsigned int tag;
    } AVCodecTag;


    typedef struct stsdtable
    {
        unsigned int size;
        char format[4];
        int res1;
        int ref;
        short version;
        short pad1;
        int pad2;
        short channels;
        short bitspersample;
        short compress_id;
        short res2;
        short samplerate1;
        short samplerate2;
        //{if(version==1)

            int sampleperpacket;
            int bytesperpacket;
            int bytesperframe;
            int bytespersample;
        //}

    } stsdtable;

    /***** result is stored here ******/
    typedef struct sampletable
    {
        unsigned int size;
        unsigned int id_of_sd;
    } sampletable;
    //MP4Analyze.cpp


    #include "MP4Analyze.h"
    #include <vector>
    #include <map>
    #include <iostream>
    #include <string>
    #ifdef WIN32
    #include <winsock2.h> 
    #pragma comment(lib, "Ws2_32.lib")
    #pragma warning (disable:4786)
    #endif

    #ifdef __GNUG__
    #include <netinet/in.h>
    #endif
    using namespace std;

    /**
    *** mp4存在宽度为8字节的wide atom tag,需要注意,这里暂未考虑
    **/


    /*
    * check if a mov/mp4/3gp type
    */

    int check_format(uint8_t *data, int size)
    {
        if(strncmp((char*)moov,(char*)(data+4),4)==||
            strncmp((char*)ftyp,(char*)(data+4),4)==||strncmp((char*)mdat,(char*)(data+4),4)==)
            return 0;
        return -1;
    }

    unsigned int get_size(const uint8_t *data,int size)
    {
        unsigned int tmp = 0;
        for(int i=0; i<size; ++i)
        {
            tmp <<= 8;
            tmp += *data++;
        }
        return tmp;
    }
    /* if found,return the offset from the data[0]*/
    int seek_tag(uint8_t tag[],uint8_t *data, unsigned int size1,uint8_t **pos,unsigned int *size2)
    {
        if(data == NULL || size1 == 0)
            return -1;
        unsigned int tag_size = get_size(data,4);
        if(tag_size >size1 + 8)
            return -1;
        unsigned int tmp = 0;
        while(strncmp((char*)data+4,(char*)tag,4) != 0)
        {
        //    printf("%s/n",data+4);

            if(tag_size==0)
                return -1;
            if(tag_size < size1 + 8)
            {
                data += tag_size;
                tmp += tag_size;
            }
            else
                return -1;
            tag_size = get_size(data,4);
        }
        printf("find :%c%c%c%c/n",tag[0],tag[1],tag[2],tag[3]);
        if(tmp + tag_size > size1 )
         printf("warning: the atom may be not complete!/n");
        *pos = data+8;
        *size2 = tag_size -8;
        return tmp;
    }
    /*** elementary stream descriptor analyse ***/
    /*
    unsigned int codec_get_tag(const AVCodecTag *tags, int id)
    {
        while (tags->id != CODEC_ID_NONE) {
            if (tags->id == id)
                return tags->tag;
            tags++;
        }
        return 0;
    }
    /* may not need analyse
    int esds_analyze(uint8_t *data, unsigned int size)
    {
        return 0;
    }
    */


    /*version == 2 ??? reffer to ffmpeg source mov.c line 943
    if (format == MKTAG('l','p','c','m'))
            st->codec->codec_id = mov_get_lpcm_codec_id(st->codec->bits_per_coded_sample, flags);
    */

    vector<stsdtable>& get_audio_info(uint8_t *data, unsigned int size, vector<stsdtable>& stable)//stsd

    {
        uint8_t * datapos = data;
        Atom *stsd_audio =(Atom *)data;
        int tmp_size = 16;

        printf("size : %u/n",ntohl(stsd_audio->size));
        printf("num_entr: %u/n",ntohl(stsd_audio->num_of_entries));

        for(int i=0; i < ntohl(stsd_audio->num_of_entries); ++i)
        {
            if(tmp_size > size)//注意

                return stable;
            datapos += tmp_size;
            stsdtable * audio_entry = (stsdtable *)(datapos);
            stable.push_back(*audio_entry);//这里存入的是网络序的数据,使用时需进行转换

            tmp_size += ntohl(audio_entry->size);

         /***************/
            printf("--tablesize: %d/n",ntohl(audio_entry->size));
            printf("--format : %s/n",audio_entry->format);
            printf("--version : %d/n",ntohs(audio_entry->version));
            printf("--channels: %d/n",ntohs(audio_entry->channels));
            printf("--bitpersam: %d/n",ntohs(audio_entry->bitspersample));
            printf("--IDcompress: %d/n",ntohs(audio_entry->compress_id));    
            printf("--samplerate: %d.%d/n",ntohs(audio_entry->samplerate1),ntohs(audio_entry->samplerate2));
            /**************/

         tmp_size = sizeof(stsdtable);
            if(ntohs(audio_entry->version)==0)
            {
                tmp_size -= 16;
            }
            datapos += tmp_size;
            //if(ntohs(audio_entry->compress_id)==-2)//此处尚需考证

            if(ntohl(audio_entry->size) > sizeof(stsdtable))
            {
                printf("----atom size:%d/n",get_size(datapos,4));
                printf("----atom name:%c%c%c%c/n",datapos[4],datapos[5],datapos[6],datapos[7]);
                if(strncmp((char*)datapos,(char*)esds,4)==0)
                {
                    //handle esds

                }
            }
        }
        return stable;
    }
    map<unsigned int,sampletable> & get_packet_offset(uint8_t *STBL[], map<unsigned int,sampletable>& table)
    {
        //table.insert(pair<long,sampletable>(1,sample));

        unsigned int num_sam_to_chunk = get_size(STBL[0]-4,4);//stsc

        unsigned int num_sample = get_size(STBL[1]-4,4);//stsz

        unsigned int num_chunk = get_size(STBL[2]-4,4);//stco

        unsigned int chunk_index = 0;
        unsigned int next_chunk_index = 0;
        uint8_t *cur_sam_to_chunk = STBL[0];
        uint8_t *cur_sam_size = STBL[1];
        uint8_t *cur_chunk_offset = STBL[2];
        sampletable sample;
        printf("number of stsc entries:%d /nnumber of sample size:%d /nnumber of chunk offset:%d/n",num_sam_to_chunk,num_sample,num_chunk);
        for(unsigned int i = 0; i < num_sam_to_chunk; ++i)//对所有的entries

        {
            chunk_index = get_size(cur_sam_to_chunk,4);
            next_chunk_index = get_size(cur_sam_to_chunk+12,4);
            sample.id_of_sd = get_size(cur_sam_to_chunk+8,4);
            if(== num_sam_to_chunk -1)//最后一个

            {
                next_chunk_index = num_chunk+1;
            } 
            printf("chunk_index:(%d---%d)/n",chunk_index,next_chunk_index);
            for(unsigned int k=chunk_index; k < next_chunk_index; ++k)//当前chunk序号到下一个chunk序号之间的chunk

            {//处理所有重复的chunk

                printf("chunk_index:%d sample num:%d/n",chunk_index,get_size(cur_sam_to_chunk+4,4));
                unsigned int offset = get_size(cur_chunk_offset+(chunk_index-1)*4,4);
                for(unsigned int j=0; j < get_size(cur_sam_to_chunk+4,4); ++j)//chunk内地sample数目

                {//处理该chunk中的sample

                    sample.size = get_size(cur_sam_size,4);    
                    printf("--sample offset:%d %x size:%d/n",offset,offset,sample.size);
                    table.insert(pair<unsigned int,sampletable>(offset,sample));
                    offset = offset + sample.size;
                    cur_sam_size += 4;
                }
                system("pause");
                chunk_index++;
            }
            cur_sam_to_chunk += 12;
        }
        return table;
    }

    int seek_audio_atom( uint8_t *data1, unsigned int size1)
    {
        uint8_t tag[] = "mdiaminfsmhd";
        uint8_t *datapos;
        unsigned int tag_size;
        uint8_t *data;
        unsigned int size;
        int offset_of_atom = 0;
        if((offset_of_atom = seek_tag(moov, data1, size1, &data, &size)) == -1)
            return -1;
        if(offset_of_atom + size >size1)
        { //some handles

            printf("moov atom is not complete,need more data");
        }
        data1 = data;
        size1 = size;
        uint8_t *nexttrak = data;
        unsigned int traksize = size;
        int i=0;
        while(1)
        {
            printf("-----/n");
            if(seek_tag(trak, nexttrak, traksize, &datapos, &tag_size) != -1)
            {
                nexttrak = datapos + tag_size;
                if(size1 < (nexttrak - data1))
                    return -1;
                traksize = size1 - (nexttrak - data1);
                data = datapos;
                size = tag_size;
            } 
            else
            {
                return -1;
            }
            i=0;
            while(i<3)
            {
                if(seek_tag(tag+i*4, data, size, &datapos, &tag_size) != -1)
                {
                    if(i==2)
                     break;
                    data = datapos;
                    size = tag_size;
                    ++i;
                }
                else
                {
                    break;
                }
            }
            if(strncmp("smhd",(char*)(datapos-4),4) == 0)
            {
                if(seek_tag(stbl, data, size, &datapos, &tag_size)!= -1)
                {
                    printf("—find audio stbl—!/n");
                    data = datapos;
                    size = tag_size;

                    if(seek_tag(stsd, data, size, &datapos, &tag_size) != -1)
                    {
                        vector<stsdtable> stable; //音频信息

                        get_audio_info(datapos-8, tag_size,stable);
                    }

                    uint8_t *STBL[3] ={NULL,NULL,NULL};//

                    uint8_t *datapos1;
                    unsigned int tag_size1;//

                    if(seek_tag(stsc, data, size, &datapos1, &tag_size1) != -1)
                    {
                        STBL[0] = datapos1 + 8;
                    }
                    uint8_t *datapos2;
                    unsigned int tag_size2;
                    if(seek_tag(stsz, data, size, &datapos2, &tag_size2) != -1)
                    {
                        STBL[1] = datapos2 + 12;
                    }
                    uint8_t *datapos3;
                    unsigned int tag_size3;
                    if(seek_tag(stco, data, size, &datapos3, &tag_size3) != -1)
                    {
                        STBL[2] = datapos3 + 8;
                    }
                    if(STBL[0] && STBL[1] && STBL[2] )
                    {
                        map<unsigned int,sampletable> postable;//音频帧信息

                        get_packet_offset(STBL,postable);
                    }
                }
                return 0;
            }
        }
        return -1;
    }
    int main(char arg, char *argv[])
    {
        FILE *mp4;
        cout<<"please input the file name :"<<endl;
        string filename;
        cin>>filename;
        mp4 = fopen(filename.c_str(),"rb");
        uint8_t buffer[300000];
        fread(buffer,1,300000,mp4);

        seek_audio_atom((uint8_t*)buffer,300000);

        fclose(mp4);
        return 0;
    }


  • 相关阅读:
    redis使用基础(十) ——Redis存储Session
    redis使用基础(十一) ——Redis特殊情况处理机制
    redis使用基础(八) ——Redis命令属性
    redis使用基础(九) ——Redis虚拟内存
    性能
    laradock phpstorm xdebug
    docker笔记
    go注意点
    微服务
    分布式
  • 原文地址:https://www.cnblogs.com/eustoma/p/2415769.html
Copyright © 2020-2023  润新知