1.
由于最新ffmpeg 库的sample_fmt必须以AV_SAMPLE_FMT_FLTP这种方式进行存储,而ENCODING_PCM_16BIT 是AV_SAMPLE_FMT_S16格式的。如果是单声道的话两者区别不大,都可以存在AVFrame->data[0] 里面,只是AV_SAMPLE_FMT_FLTP是以浮点数的方式存储,后者是以有符号整形16位存贮的。如果是双声道的话二者的区别就比较大了。
AV_SAMPLE_FMT_S16是非平坦的,左右声道以LRLR....的方式连续的存在一个数组里面,AV_SAMPLE_FMT_FLTP是平坦的存贮方式,左右声道是分开存在两个数组里面的。
如何将Android 以 CHANNEL_IN_STEREO(双声道) ENCODING_PCM_16BIT (AV_SAMPLE_FMT_S16) 录制方式用ffmpeg 用AV_SAMPLE_FMT_FLTP编码为aac音频文件呢?其实ffmepg 已经提供了这样的接口,就是swr_convert 函数,转换方式如下:
https://www.bilibili.com/video/BV1jz4y1Z7pf?p=1
https://blog.csdn.net/XIAIBIANCHENG/article/details/72810495
https://www.jianshu.com/p/be78830eefbd
2.音频渲染
- (AudioFrame *) handleAudioFrame { if (!_audioFrame->data[0]) return nil; id<AudioManager> audioManager = [AudioManager audioManager]; const NSUInteger numChannels = audioManager.numOutputChannels; NSInteger numFrames; void * audioData; if (_swrContext) { const NSUInteger ratio = MAX(1, audioManager.samplingRate / _audioCodecCtx->sample_rate) * MAX(1, audioManager.numOutputChannels / _audioCodecCtx->channels) * 2; const int bufSize = av_samples_get_buffer_size(NULL, audioManager.numOutputChannels, _audioFrame->nb_samples * ratio, AV_SAMPLE_FMT_S16, 1); if (!_swrBuffer || _swrBufferSize < bufSize) { _swrBufferSize = bufSize; _swrBuffer = realloc(_swrBuffer, _swrBufferSize); } Byte *outbuf[2] = { _swrBuffer, 0 }; numFrames = swr_convert(_swrContext, outbuf, _audioFrame->nb_samples * ratio, (const uint8_t **)_audioFrame->data, _audioFrame->nb_samples); if (numFrames < 0) { LoggerAudio(0, @"fail resample audio"); return nil; } //int64_t delay = swr_get_delay(_swrContext, audioManager.samplingRate); //if (delay > 0) // LoggerAudio(0, @"resample delay %lld", delay); audioData = _swrBuffer; } else { if (_audioCodecCtx->sample_fmt != AV_SAMPLE_FMT_S16) { NSAssert(false, @"bucheck, audio format is invalid"); return nil; } audioData = _audioFrame->data[0]; numFrames = _audioFrame->nb_samples; } const NSUInteger numElements = numFrames * numChannels; NSMutableData *data = [NSMutableData dataWithLength:numElements * sizeof(float)]; float scale = 1.0 / (float)INT16_MAX ; vDSP_vflt16((SInt16 *)audioData, 1, data.mutableBytes, 1, numElements); vDSP_vsmul(data.mutableBytes, 1, &scale, data.mutableBytes, 1, numElements); AudioFrame *frame = [[AudioFrame alloc] init]; frame.position = av_frame_get_best_effort_timestamp(_audioFrame) * _audioTimeBase; frame.duration = av_frame_get_pkt_duration(_audioFrame) * _audioTimeBase; frame.samples = data; if (frame.duration == 0) { // sometimes ffmpeg can't determine the duration of audio frame // especially of wma/wmv format // so in this case must compute duration frame.duration = frame.samples.length / (sizeof(float) * numChannels * audioManager.samplingRate); } #if DEBUG LoggerAudio(2, @"AFD: %.4f %.4f | %.4f ", frame.position, frame.duration, frame.samples.length / (8.0 * 44100.0)); #endif return frame; }
https://blog.csdn.net/xwl198937/article/details/52386331