由于近期需要做一些语音合成的工作,因此,需要进行对语音合成的数据进行实时播放,到网上找了一下资料,参考MSDN的相关说明,写下了如下一个PCM播放数据流的类,多说无益,直接上码:
#pragma once #include <Windows.h> #include "mmsystem.h" #pragma comment(lib, "winmm.lib") #define DEF_MAX_BUFFER_SIZE (1024 * 16) #define DEF_MAX_BUFFER_COUNT 16 class CPcmSpeaker { public: CPcmSpeaker(int bufferSize = DEF_MAX_BUFFER_SIZE, int bufferCnt = DEF_MAX_BUFFER_COUNT); ~CPcmSpeaker(); int init(int channels, int samplePerSec, int bitsPerSample); //添加PCM音频数据,等待播放 int toSpeaker(const void *data, int len, int timeout = INFINITE); int clearPcmData(); private: typedef struct { WAVEHDR header; char *data; }WaveHeadandData; int m_maxBufferSize; int m_maxBufferCnt; WaveHeadandData *m_headAndDatas; static void CALLBACK waveOutProc(HWAVEOUT hwo, UINT uMsg, DWORD dwInstance, DWORD dwParam1, DWORD dwParam2); int writeToWave(const void *data, int len); int pcmtoWave(const void *data, int len, int timeout = INFINITE); // 公共信息 WAVEFORMATEX m_waveFormat; HWAVEOUT m_hWaveOut; // WAVEOUT句柄 HANDLE m_hBufferEvent; CRITICAL_SECTION m_BufferOpCriticalSection; };
#include "PcmSpeaker.h" CPcmSpeaker::CPcmSpeaker(int bufferSize, int bufferCnt) { m_hWaveOut = NULL; m_hBufferEvent = CreateEvent(NULL, FALSE, FALSE, NULL); InitializeCriticalSection(&m_BufferOpCriticalSection); //申请内存 m_headAndDatas = new WaveHeadandData[bufferCnt]; for (int i = 0; i < bufferCnt; i++) { memset(&m_headAndDatas[i].header, 0, sizeof(WAVEHDR)); m_headAndDatas[i].header.dwFlags = WHDR_DONE; m_headAndDatas[i].data = new char[bufferSize]; } m_maxBufferSize = bufferSize; m_maxBufferCnt = bufferCnt; } CPcmSpeaker::~CPcmSpeaker() { //关闭Wave if (m_hWaveOut != NULL) { clearPcmData(); waveOutClose(m_hWaveOut); m_hWaveOut = NULL; } //关闭一些句柄 CloseHandle(m_hBufferEvent); //删除临界区 DeleteCriticalSection(&m_BufferOpCriticalSection); //释放内存 for (int i = 0; i < m_maxBufferCnt; i++) delete[] m_headAndDatas[i].data; delete[] m_headAndDatas; } int CPcmSpeaker::init(int channels, int samplePerSec, int bitsPerSample) { if (m_hWaveOut != NULL) { return 0;// 已经进行了初始化 } // 第一步: 获取waveformat信息 m_waveFormat.wFormatTag = WAVE_FORMAT_PCM; m_waveFormat.nChannels = channels; m_waveFormat.wBitsPerSample = bitsPerSample; m_waveFormat.nSamplesPerSec = samplePerSec; m_waveFormat.nBlockAlign = m_waveFormat.nChannels * m_waveFormat.wBitsPerSample / 8; m_waveFormat.nAvgBytesPerSec = m_waveFormat.nSamplesPerSec * m_waveFormat.nBlockAlign; m_waveFormat.cbSize = sizeof(m_waveFormat); MMRESULT ret = waveOutOpen(NULL, WAVE_MAPPER, &m_waveFormat, NULL, NULL, WAVE_FORMAT_QUERY); if (MMSYSERR_NOERROR != ret) { return -1; } // 第二步: 获取WAVEOUT句柄 ret = waveOutOpen(&m_hWaveOut, WAVE_MAPPER, &m_waveFormat, (DWORD_PTR)waveOutProc, (DWORD_PTR)this, CALLBACK_FUNCTION); if (MMSYSERR_NOERROR != ret) { return -1; } return 0; } void CALLBACK CPcmSpeaker::waveOutProc(HWAVEOUT hwo, UINT uMsg, DWORD dwInstance, DWORD dwParam1, DWORD dwParam2) { CPcmSpeaker *render = (CPcmSpeaker *)dwInstance; //WAVEHDR *header = (WAVEHDR *)dwParam1; int i = 0; switch (uMsg) { case WOM_DONE: EnterCriticalSection(&render->m_BufferOpCriticalSection); SetEvent(render->m_hBufferEvent); LeaveCriticalSection(&render->m_BufferOpCriticalSection); break; case WOM_CLOSE: i = 1; break; case WOM_OPEN: i = 2; break; } } int CPcmSpeaker::clearPcmData() { if (m_hWaveOut != NULL) { EnterCriticalSection(&m_BufferOpCriticalSection); for (int i = 0; i < m_maxBufferCnt; i++) { if (m_headAndDatas[i].header.dwFlags & WHDR_PREPARED) //有数据被Prepered waveOutUnprepareHeader(m_hWaveOut, &m_headAndDatas[i].header, sizeof(WAVEHDR)); } waveOutReset(m_hWaveOut); LeaveCriticalSection(&m_BufferOpCriticalSection); } return 0; } int CPcmSpeaker::writeToWave(const void *data, int len) { MMRESULT mmres; int i; EnterCriticalSection(&m_BufferOpCriticalSection); for (i = 0; i < m_maxBufferCnt; i++) if (m_headAndDatas[i].header.dwFlags & WHDR_DONE) { //查看是否需要释放之前已经Prepared资源 if (m_headAndDatas[i].header.dwFlags & WHDR_PREPARED) //有数据被Prepered waveOutUnprepareHeader(m_hWaveOut, &m_headAndDatas[i].header, sizeof(WAVEHDR)); //写入新的数据到音频缓冲区 memcpy(m_headAndDatas[i].data, data, len); m_headAndDatas[i].header.lpData = m_headAndDatas[i].data; m_headAndDatas[i].header.dwBufferLength = len; m_headAndDatas[i].header.dwFlags = 0; mmres = waveOutPrepareHeader(m_hWaveOut, &m_headAndDatas[i].header, sizeof(WAVEHDR)); if (MMSYSERR_NOERROR == mmres) mmres = waveOutWrite(m_hWaveOut, &m_headAndDatas[i].header, sizeof(WAVEHDR)); break; } LeaveCriticalSection(&m_BufferOpCriticalSection); if (i == m_maxBufferCnt) return -2; return (mmres == MMSYSERR_NOERROR) ? 0 : -1; } //添加PCM音频数据,等待播放 int CPcmSpeaker::pcmtoWave(const void *data, int len, int timeout) { int res; if (len > m_maxBufferSize) return -1; res = writeToWave(data, len); //缓冲区已满,需要等待 if (res == -2) { if (WAIT_OBJECT_0 == WaitForSingleObject(m_hBufferEvent, timeout)) res = writeToWave(data, len); } return res; } int CPcmSpeaker::toSpeaker(const void *data, int len, int timeout) { int res; int n, l, ptr; //对大数据做分段处理 n = len / m_maxBufferSize; l = len % m_maxBufferSize; ptr = 0; for (int i = 0; i < n; i++) { res = pcmtoWave(((char *)data) + ptr, m_maxBufferSize, timeout); ptr += m_maxBufferSize; if (res != 0) return -1; } return pcmtoWave(((char *)data) + ptr, l, timeout); }
用法非常简单,如下:
CPcmSpeaker ps;
ps.init(1, 16000, 16);
三个参数分别为:通道数,采样速率,单次采样数据位 ps.toSpeaker(data, data_len);
两个参数分别为PCM数据指针和数据长度。 微软的这个Waveform相关的函数,感觉比较原始,用的时候,需要如下注意事项:
waveOutProc
中不能调用任何 Waveform
相关函数,原文如下: Applications should not call any system-defined functions from inside a callback function, except for EnterCriticalSection
, LeaveCriticalSection
, midiOutLongMsg
, midiOutShortMsg
, OutputDebugString
, PostMessage
, PostThreadMessage
, SetEvent
, timeGetSystemTime
, timeGetTime
, timeKillEvent
, and timeSetEvent
. Calling other wave functions will cause deadlock.
CPcmSpeaker
的构造函数定义为: CPcmSpeaker(int bufferSize = DEF_MAX_BUFFER_SIZE, int bufferCnt = DEF_MAX_BUFFER_COUNT)
,其有两个有默认值的参数,分别为每次写入系统音频缓冲区的数据的最大大小,以及 CPcmSpeaker
自己的缓冲区个数, bufferCnt
不要太小(最好大于2,根据具体的情况设置大小,建议值为32),否则会出现卡顿现象。
toSpeaker
函数,带有第三个参数(默认为 INFINITE
) timeout
,表示函数调用超时时间。换句话说,该函数在某种程度上是阻塞式的,即,如果写入的太快,使得系统来不及播放数据,导致CPcmSpeaker类内部的缓冲区已经满了,那么 toSpeaker
函数将会等待有新的缓冲区数据被播放后,腾出空间后,才返回,当然如果你不想死等,可以设置一个超时值,超时后,也会返回。