需要先从麦克风中采样,代码样本可以参考官方示例: WASAPI Capture Shared Event Driven
官方示例采样10s, 我们需要在WriteWaveFile函数下添加生成原始音频的代码。
// Write the contents of a WAV file. We take as input the data to write and the format of that data. // bool WriteWaveFile(HANDLE FileHandle, const BYTE *Buffer, const size_t BufferSize, const WAVEFORMATEX *WaveFormat) { FILE* _file; int16_t* _data; _data = (int16_t*)Buffer; _file = fopen("utterance.raw", "wb +"); fwrite(_data, 1, BufferSize, _file); fclose(_file); ...
得到utterance.raw原始音频样本之后,我们需要对其添加wav的头结构来重新生成wav文件
#include <Windows.h> #include <stdio.h> #include <MMDeviceAPI.h> #include <AudioClient.h> #include <assert.h> #include <avrt.h> #include <strsafe.h> #include <fstream> using namespace std; #pragma warning(disable:4996) struct WAVEHEADER { DWORD dwRiff; // "RIFF" DWORD dwSize; // Size DWORD dwWave; // "WAVE" DWORD dwFmt; // "fmt " DWORD dwFmtSize; // Wave Format Size }; const BYTE WaveHeader[] = { 'R', 'I', 'F', 'F', 0x00, 0x00, 0x00, 0x00, 'W', 'A', 'V', 'E', 'f', 'm', 't', ' ', 0x00, 0x00, 0x00, 0x00 }; const BYTE WaveData[] = { 'd', 'a', 't', 'a' }; bool WriteWaveFile(HANDLE FileHandle, const BYTE* Buffer, const size_t BufferSize, WAVEFORMATEX* WaveFormat) { DWORD waveFileSize = sizeof(WAVEHEADER) + sizeof(WAVEFORMATEX) + WaveFormat->cbSize + sizeof(WaveData) + sizeof(DWORD) + static_cast<DWORD>(BufferSize); BYTE* waveFileData = new (std::nothrow) BYTE[waveFileSize]; BYTE* waveFilePointer = waveFileData; WAVEHEADER* waveHeader = reinterpret_cast<WAVEHEADER*>(waveFileData); if (waveFileData == NULL) { printf("Unable to allocate %d bytes to hold output wave data ", waveFileSize); return false; } // // Copy in the wave header - we'll fix up the lengths later. // CopyMemory(waveFilePointer, WaveHeader, sizeof(WaveHeader)); waveFilePointer += sizeof(WaveHeader); // // Update the sizes in the header. // waveHeader->dwSize = waveFileSize - (2 * sizeof(DWORD)); waveHeader->dwFmtSize = sizeof(WAVEFORMATEX) + WaveFormat->cbSize; // // Next copy in the WaveFormatex structure. // CopyMemory(waveFilePointer, WaveFormat, sizeof(WAVEFORMATEX) + WaveFormat->cbSize); waveFilePointer += sizeof(WAVEFORMATEX) + WaveFormat->cbSize; // // Then the data header. // CopyMemory(waveFilePointer, WaveData, sizeof(WaveData)); waveFilePointer += sizeof(WaveData); *(reinterpret_cast<DWORD*>(waveFilePointer)) = static_cast<DWORD>(BufferSize); waveFilePointer += sizeof(DWORD); // // And finally copy in the audio data. // CopyMemory(waveFilePointer, Buffer, BufferSize); // // Last but not least, write the data to the file. // DWORD bytesWritten; if (!WriteFile(FileHandle, waveFileData, waveFileSize, &bytesWritten, NULL)) { printf("Unable to write wave file: %d ", GetLastError()); delete[]waveFileData; return false; } if (bytesWritten != waveFileSize) { printf("Failed to write entire wave file "); delete[]waveFileData; return false; } delete[]waveFileData; return true; } // // Write the captured wave data to an output file so that it can be examined later. // void SaveWaveData(BYTE* CaptureBuffer, size_t BufferSize, WAVEFORMATEX* WaveFormat) { wchar_t waveFileName[MAX_PATH]; HRESULT hr = StringCbCopy(waveFileName, sizeof(waveFileName), L"WASAPICaptureEventDriven-"); if (SUCCEEDED(hr)) { GUID testGuid; if (SUCCEEDED(CoCreateGuid(&testGuid))) { wchar_t* guidString; if (SUCCEEDED(StringFromCLSID(testGuid, &guidString))) { hr = StringCbCat(waveFileName, sizeof(waveFileName), guidString); if (SUCCEEDED(hr)) { hr = StringCbCat(waveFileName, sizeof(waveFileName), L".WAV"); if (SUCCEEDED(hr)) { HANDLE waveHandle = CreateFile(waveFileName, GENERIC_WRITE, FILE_SHARE_READ, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_SEQUENTIAL_SCAN, NULL); if (waveHandle != INVALID_HANDLE_VALUE) { if (WriteWaveFile(waveHandle, CaptureBuffer, BufferSize, WaveFormat)) { printf("Successfully wrote WAVE data to %S ", waveFileName); } else { printf("Unable to write wave file "); } CloseHandle(waveHandle); } else { printf("Unable to open output WAV file %S: %d ", waveFileName, GetLastError()); } } } CoTaskMemFree(guidString); } } } } int main() { long buffersize = 3528000;// For 10s audio sample, we can set the value of buffersize to 3528000 BYTE* captureBuffer = new (std::nothrow) BYTE[buffersize]; FILE* _file; _file = fopen("utterance.raw", "rb"); //raw audio path fread(captureBuffer, 1, buffersize, _file); fclose(_file); WAVEFORMATEX wavformat; wavformat.wFormatTag = WAVE_FORMAT_IEEE_FLOAT; wavformat.nChannels = 2; wavformat.nSamplesPerSec = 44100; wavformat.nAvgBytesPerSec = 352800; wavformat.nBlockAlign = 8; wavformat.wBitsPerSample = 32; wavformat.cbSize = 22; SaveWaveData(captureBuffer, buffersize, &wavformat); return 0; }
通过将样本的路径添加给fopen,经过一些处理就可以得到了。
补充:
我们也可以使用ofstream来采样,代码见下:
// Write the contents of a WAV file. We take as input the data to write and the format of that data. // bool WriteWaveFile(HANDLE FileHandle, const BYTE *Buffer, const size_t BufferSize, const WAVEFORMATEX *WaveFormat) { ofstream binaryFile("file.raw", ios::out | ios::binary); binaryFile.write((char*)Buffer, BufferSize); binaryFile.close(); ...
再使用ifstream获取样本,添加wav的头结构来重新编码。
ifstream infile("file.raw", std::ifstream::binary); // get size of file infile.seekg(0, infile.end); long size = infile.tellg(); infile.seekg(0); BYTE* captureBuffer = new (std::nothrow) BYTE[size]; infile.read((char*)captureBuffer, size); infile.close(); WAVEFORMATEX wavformat; wavformat.wFormatTag = WAVE_FORMAT_IEEE_FLOAT; wavformat.nChannels = 2; wavformat.nSamplesPerSec = 44100; wavformat.nAvgBytesPerSec = 352800; wavformat.nBlockAlign = 8; wavformat.wBitsPerSample = 32; wavformat.cbSize = 22; SaveWaveData(captureBuffer, size, &wavformat);