• 音频自动增益 与 静音检测 算法 附完整C代码【转】


    转自:https://www.cnblogs.com/cpuimage/p/8908551.html

    前面分享过一个算法《音频增益响度分析 ReplayGain 附完整C代码示例

    主要用于评估一定长度音频的音量强度,

    而分析之后,很多类似的需求,肯定是做音频增益,提高音量诸如此类做法。

    不过在项目实测的时候,其实真的很难定标准,

    到底在什么样的环境下,要增大音量,还是降低。

    在通讯行业一般的做法就是采用静音检测,

    一旦检测为静音或者噪音,则不做处理,反之通过一定的策略进行处理。

    这里就涉及到两个算法,一个是静音检测,一个是音频增益。

    增益其实没什么好说的,类似于数据归一化拉伸的做法。

    静音检测 在WebRTC中 是采用计算GMM (Gaussian Mixture Model,高斯混合模型)进行特征提取的。

    在很长一段时间里面,音频特征 有3个主要的方法,

    GMM  ,Spectrogram (声谱图), MFCC 即 Mel-Frequency Cepstrum(Mel频率倒谱)

    恕我直言,GMM 提取的特征,其鲁棒性 不如后两者。

    也不多做介绍,感兴趣的同学,翻翻 维基百科 ,补补课。

    当然在实际使用算法时,会由此延伸出来一些小技巧。

    例如,用静音检测 来做音频裁剪,或者搭配音频增益做一些音频增强之类的操作。

    自动增益在WebRTC 源代码文件是:analog_agc.c 和 digital_agc.c

    静音检测 源代码文件是: webrtc_vad.c

    这个命名,有一定的历史原因了。

    经过梳理后,

    增益算法为 agc.c agc.h

    静音检测为 vad.c vad.h

    增益算法的完整示例代码:

    复制代码
    #include <stdio.h>
    #include <stdlib.h>
    #include <stdint.h>
    //采用https://github.com/mackron/dr_libs/blob/master/dr_wav.h 解码
    #define DR_WAV_IMPLEMENTATION
    #include "dr_wav.h"
    #include "agc.h"
    
    #ifndef nullptr
    #define nullptr 0
    #endif
    
    #ifndef MIN
    #define  MIN(A, B)        ((A) < (B) ? (A) : (B))
    #endif
    
    //写wav文件
    void wavWrite_int16(char *filename, int16_t *buffer, size_t sampleRate, size_t totalSampleCount) {
        drwav_data_format format = {};
        format.container = drwav_container_riff;     // <-- drwav_container_riff = normal WAV files, drwav_container_w64 = Sony Wave64.
        format.format = DR_WAVE_FORMAT_PCM;          // <-- Any of the DR_WAVE_FORMAT_* codes.
        format.channels = 1;
        format.sampleRate = (drwav_uint32) sampleRate;
        format.bitsPerSample = 16;
        drwav *pWav = drwav_open_file_write(filename, &format);
        if (pWav) {
            drwav_uint64 samplesWritten = drwav_write(pWav, totalSampleCount, buffer);
            drwav_uninit(pWav);
            if (samplesWritten != totalSampleCount) {
                fprintf(stderr, "ERROR
    ");
                exit(1);
            }
        }
    }
    
    //读取wav文件
    int16_t *wavRead_int16(char *filename, uint32_t *sampleRate, uint64_t *totalSampleCount) {
        unsigned int channels;
        int16_t *buffer = drwav_open_and_read_file_s16(filename, &channels, sampleRate, totalSampleCount);
        if (buffer == nullptr) {
            printf("读取wav文件失败.");
        }
        //仅仅处理单通道音频
        if (channels != 1) {
            drwav_free(buffer);
            buffer = nullptr;
            *sampleRate = 0;
            *totalSampleCount = 0;
        }
        return buffer;
    }
    
    //分割路径函数
    void splitpath(const char *path, char *drv, char *dir, char *name, char *ext) {
        const char *end;
        const char *p;
        const char *s;
        if (path[0] && path[1] == ':') {
            if (drv) {
                *drv++ = *path++;
                *drv++ = *path++;
                *drv = '';
            }
        } else if (drv)
            *drv = '';
        for (end = path; *end && *end != ':';)
            end++;
        for (p = end; p > path && *--p != '\' && *p != '/';)
            if (*p == '.') {
                end = p;
                break;
            }
        if (ext)
            for (s = end; (*ext = *s++);)
                ext++;
        for (p = end; p > path;)
            if (*--p == '\' || *p == '/') {
                p++;
                break;
            }
        if (name) {
            for (s = p; s < end;)
                *name++ = *s++;
            *name = '';
        }
        if (dir) {
            for (s = path; s < p;)
                *dir++ = *s++;
            *dir = '';
        }
    }
    
    
    int agcProcess(int16_t *buffer, uint32_t sampleRate, size_t samplesCount, int16_t agcMode) {
        if (buffer == nullptr) return -1;
        if (samplesCount == 0) return -1;
        WebRtcAgcConfig agcConfig;
        agcConfig.compressionGaindB = 9; // default 9 dB
        agcConfig.limiterEnable = 1; // default kAgcTrue (on)
        agcConfig.targetLevelDbfs = 3; // default 3 (-3 dBOv)
        int minLevel = 0;
        int maxLevel = 255;
        size_t samples = MIN(160, sampleRate / 100);
        if (samples == 0) return -1;
        const int maxSamples = 320;
        int16_t *input = buffer;
        size_t nTotal = (samplesCount / samples);
        void *agcInst = WebRtcAgc_Create();
        if (agcInst == NULL) return -1;
        int status = WebRtcAgc_Init(agcInst, minLevel, maxLevel, agcMode, sampleRate);
        if (status != 0) {
            printf("WebRtcAgc_Init fail
    ");
            WebRtcAgc_Free(agcInst);
            return -1;
        }
        status = WebRtcAgc_set_config(agcInst, agcConfig);
        if (status != 0) {
            printf("WebRtcAgc_set_config fail
    ");
            WebRtcAgc_Free(agcInst);
            return -1;
        }
        size_t num_bands = 1;
        int inMicLevel, outMicLevel = -1;
        int16_t out_buffer[maxSamples];
        int16_t *out16 = out_buffer;
        uint8_t saturationWarning = 1;                 //是否有溢出发生,增益放大以后的最大值超过了65536
        int16_t echo = 0;                                 //增益放大是否考虑回声影响
        for (int i = 0; i < nTotal; i++) {
            inMicLevel = 0;
            int nAgcRet = WebRtcAgc_Process(agcInst, (const int16_t *const *) &input, num_bands, samples,
                                            (int16_t *const *) &out16, inMicLevel, &outMicLevel, echo,
                                            &saturationWarning);
    
            if (nAgcRet != 0) {
                printf("failed in WebRtcAgc_Process
    ");
                WebRtcAgc_Free(agcInst);
                return -1;
            }
            memcpy(input, out_buffer, samples * sizeof(int16_t));
            input += samples;
        }
        WebRtcAgc_Free(agcInst);
        return 1;
    }
    
    void auto_gain(char *in_file, char *out_file) {
        //音频采样率
        uint32_t sampleRate = 0;
        //总音频采样数
        uint64_t inSampleCount = 0;
        int16_t *inBuffer = wavRead_int16(in_file, &sampleRate, &inSampleCount);
        //如果加载成功
        if (inBuffer != nullptr) {
            //  kAgcModeAdaptiveAnalog  模拟音量调节
            //  kAgcModeAdaptiveDigital 自适应增益
            //  kAgcModeFixedDigital 固定增益
            agcProcess(inBuffer, sampleRate, inSampleCount, kAgcModeAdaptiveDigital);
            wavWrite_int16(out_file, inBuffer, sampleRate, inSampleCount);
            free(inBuffer);
        }
    }
    
    int main(int argc, char *argv[]) {
        printf("WebRTC Automatic Gain Control
    ");
        printf("博客:http://cpuimage.cnblogs.com/
    ");
        printf("音频自动增益
    ");
        if (argc < 2)
            return -1;
        char *in_file = argv[1];
        char drive[3];
        char dir[256];
        char fname[256];
        char ext[256];
        char out_file[1024];
        splitpath(in_file, drive, dir, fname, ext);
        sprintf(out_file, "%s%s%s_out%s", drive, dir, fname, ext);
        auto_gain(in_file, out_file);
    
        printf("按任意键退出程序 
    ");
        getchar();
        return 0;
    }
    复制代码

     静音检测完整示例代码:

    复制代码
    #include <stdio.h>
    #include <stdlib.h>
    #include <stdint.h>
    //采用https://github.com/mackron/dr_libs/blob/master/dr_wav.h 解码
    #define DR_WAV_IMPLEMENTATION
    
    #include "dr_wav.h"
    #include "vad.h"
    
    #ifndef nullptr
    #define nullptr 0
    #endif
    
    #ifndef MIN
    #define  MIN(A, B)        ((A) < (B) ? (A) : (B))
    #endif
    
    #ifndef MAX
    #define  MAX(A, B)        ((A) > (B) ? (A) : (B))
    #endif
    
    
    //读取wav文件
    int16_t *wavRead_int16(char *filename, uint32_t *sampleRate, uint64_t *totalSampleCount) {
        unsigned int channels;
        int16_t *buffer = drwav_open_and_read_file_s16(filename, &channels, sampleRate, totalSampleCount);
        if (buffer == nullptr) {
            printf("读取wav文件失败.");
        }
        //仅仅处理单通道音频
        if (channels != 1) {
            drwav_free(buffer);
            buffer = nullptr;
            *sampleRate = 0;
            *totalSampleCount = 0;
        }
        return buffer;
    }
    
    
    int vadProcess(int16_t *buffer, uint32_t sampleRate, size_t samplesCount, int16_t vad_mode, int per_ms_frames) {
        if (buffer == nullptr) return -1;
        if (samplesCount == 0) return -1;
        // kValidRates : 8000, 16000, 32000, 48000
        // 10, 20 or 30 ms frames
        per_ms_frames = MAX(MIN(30, per_ms_frames), 10);
        size_t samples = sampleRate * per_ms_frames / 1000;
        if (samples == 0) return -1;
        int16_t *input = buffer;
        size_t nTotal = (samplesCount / samples);
    
        void *vadInst = WebRtcVad_Create();
        if (vadInst == NULL) return -1;
        int status = WebRtcVad_Init(vadInst);
        if (status != 0) {
            printf("WebRtcVad_Init fail
    ");
            WebRtcVad_Free(vadInst);
            return -1;
        }
        status = WebRtcVad_set_mode(vadInst, vad_mode);
        if (status != 0) {
            printf("WebRtcVad_set_mode fail
    ");
            WebRtcVad_Free(vadInst);
            return -1;
        }
        printf("Activity : 
    ");
        for (int i = 0; i < nTotal; i++) {
            int nVadRet = WebRtcVad_Process(vadInst, sampleRate, input, samples);
            if (nVadRet == -1) {
                printf("failed in WebRtcVad_Process
    ");
                WebRtcVad_Free(vadInst);
                return -1;
            } else {
                // output result
                printf(" %d 	", nVadRet);
            }
            input += samples;
        }
        printf("
    ");
        WebRtcVad_Free(vadInst);
        return 1;
    }
    
    void vad(char *in_file) {
        //音频采样率
        uint32_t sampleRate = 0;
        //总音频采样数
        uint64_t inSampleCount = 0;
        int16_t *inBuffer = wavRead_int16(in_file, &sampleRate, &inSampleCount);
        //如果加载成功
        if (inBuffer != nullptr) {
            //    Aggressiveness mode (0, 1, 2, or 3)
            int16_t mode = 1;
            int per_ms = 30;
            vadProcess(inBuffer, sampleRate, inSampleCount, mode, per_ms);
            free(inBuffer);
        }
    }
    
    int main(int argc, char *argv[]) {
        printf("WebRTC Voice Activity Detector
    ");
        printf("博客:http://cpuimage.cnblogs.com/
    ");
        printf("静音检测
    ");
        if (argc < 2)
            return -1;
        char *in_file = argv[1];
        vad(in_file);
        printf("按任意键退出程序 
    ");
        getchar();
        return 0;
    }
    复制代码

    自动增益项目地址:https://github.com/cpuimage/WebRTC_AGC

    具体流程为: 

    加载wav(拖放wav文件到可执行文件上)->增益处理->保存为_out.wav文件

    静音检测项目地址:https://github.com/cpuimage/WebRTC_VAD

    具体流程为: 

    加载wav(拖放wav文件到可执行文件上)->输出静音检测结果

    备注 :1 为非静音,0 为静音

     该注意的地方和参数,见代码注释。

    用cmake即可进行编译示例代码,详情见CMakeLists.txt。

    若有其他相关问题或者需求也可以邮件联系俺探讨。

    邮箱地址是: 
    gaozhihan@vip.qq.com

  • 相关阅读:
    [SCOI2016] 幸运数字
    [CF438E] 小朋友和二叉树
    【题解】[AHOI2013]作业
    【题解】CF940F Machine Learning
    【题解】CF1207E XOR Guessing
    【题解】CF1228D Complete Tripartite
    【题解】CF1290B Irreducible Anagrams
    【题解】[JSOI2007]字符加密
    【题解】[SDOI2016]征途
    【题解】多边形染色
  • 原文地址:https://www.cnblogs.com/sky-heaven/p/8910156.html
Copyright © 2020-2023  润新知