• 一个超简单的语音识别编程,听写程序


    转载:http://blog.csdn.net/yincheng01/article/details/3584655

    CSpeechRecognition类封装了语音识别操作所需调用的几个接口,使用它进行语音识别编程很方便,也很简洁。

    CSpeechRecognition类的定义如下:

    ///////////////////////////////////////////////////////////////

    // active speech engine

    #include <atlbase.h>

    extern CComModule _Module;

    #include <atlcom.h>

    #include <sapi.h>

    #include <sphelper.h>

    #include <spuihelp.h>

    ///////////////////////////////////////////////////////////////

    // speech message

    #define WM_SREVENT   WM_USER+102

     

    class CSpeechRecognition 

    {

    public:

       CSpeechRecognition();

       virtual ~CSpeechRecognition();

     

       // initialize

       BOOL Initialize(HWND hWnd = NULL, BOOL bIsShared = TRUE);

       void Destroy();

     

       // start and stop

       BOOL Start();

       BOOL Stop();

       BOOL IsDictationOn()

       {

           return m_bOnDictation;

       }

     

       // event handler

       void GetText(WCHAR **ppszCoMemText, ULONG ulStart = 0, ULONG nlCount = -1);

     

       // voice training

       HRESULT VoiceTraining(HWND hWndParent);

     

       // microphone setup

       HRESULT MicrophoneSetup(HWND hWndParent);

     

       // token list

       HRESULT InitTokenList(HWND hWnd, BOOL bIsComboBox = FALSE);

     

       // error string

       CString GetErrorString()

       {

           return m_sError;

       }

     

       // interface

         CComPtr<ISpRecognizer> m_cpRecoEngine;  // SR engine

           CComPtr<ISpRecoContext> m_cpRecoCtxt;   //Recognition contextfor dictation

         CComPtr<ISpRecoGrammar> m_cpDictationGrammar;  // Dictation grammar

     

    private:

       CString m_sError;

        BOOL    m_bOnDictation;

    };

    其中定义的消息WM_SREVENT用于指示语音识别事件,该消息将通知到函数指定的响应窗口。

    类中定义了3个接口指针m_cpRecoEnginem_cpRecoCtxtm_cpDictationGrammar,分别用于引用语音识别引擎3个重要接口IspRecognizerISpRecoContextIspRecoGrammar

    函数Initialize语音识别引擎基本工作环境包括引擎识别上下文语法音频事件等的

    BOOL CSpeechRecognition::Initialize(HWND hWnd, BOOL bIsShared)

    {

       // com library

       if (FAILED(CoInitialize(NULL)))

       {

           m_sError=_T("Error intialization COM");

           return FALSE;

       }

     

       // SR engine

        HRESULT hr = S_OK;

        if (bIsShared)

        {

            // Shared reco engine.

            // For a shared reco engine, the audio gets setup automatically

            hr = m_cpRecoEngine.CoCreateInstance( CLSID_SpSharedRecognizer );

        }

       else

       {

           hr = m_cpRecoEngine.CoCreateInstance(CLSID_SpInprocRecognizer);

     

       }

     

       // RecoContext

        if( SUCCEEDED( hr ) )

        {

            hr = m_cpRecoEngine->CreateRecoContext( &m_cpRecoCtxt );

        }

     

        // Set recognition notification for dictation

        if (SUCCEEDED(hr))

        {

      hr = m_cpRecoCtxt->SetNotifyWindowMessage( hWnd, WM_SREVENT, 0, 0 );

        }

       

        if (SUCCEEDED(hr))

        {

            // when the engine has recognized something

            const ULONGLONG ullInterest = SPFEI(SPEI_RECOGNITION);

            hr = m_cpRecoCtxt->SetInterest(ullInterest, ullInterest);

        }

     

        // create default audio object

        CComPtr<ISpAudio> cpAudio;

        hr = SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOIN, &cpAudio);

     

        // set the input for the engine

        hr = m_cpRecoEngine->SetInput(cpAudio, TRUE);

        hr = m_cpRecoEngine->SetRecoState( SPRST_ACTIVE );

     

       // grammar

        if (SUCCEEDED(hr))

        {

            // Specifies that the grammar we want is a dictation grammar.

            // Initializes the grammar (m_cpDictationGrammar)

            hr = m_cpRecoCtxt->CreateGrammar( 0, &m_cpDictationGrammar );

        }

        if  (SUCCEEDED(hr))

        {hr = m_cpDictationGrammar->LoadDictation(NULL, SPLO_STATIC);

        }

        if (SUCCEEDED(hr))

        {

            hr = m_cpDictationGrammar->SetDictationState( SPRS_ACTIVE );

        }

        if (FAILED(hr))

        {

            m_cpDictationGrammar.Release();

        }

     

        return (hr == S_OK);

    }

     

    释放函数Destroy被类的析构函数调用,释放了类所引用的所有接口:

    void CSpeechRecognition::Destroy()

    {

       if (m_cpDictationGrammar)

           m_cpDictationGrammar.Release();

       if (m_cpRecoCtxt)

           m_cpRecoCtxt.Release();

       if (m_cpRecoEngine)

           m_cpRecoEngine.Release();

       CoUninitialize();

    }

    函数StartStop用来控制开始和停止接受及识别语音,它们通过调用引擎接口的SetRecoState方法来实现:

    BOOL CSpeechRecognition::Start()

    {

       if (m_bOnDictation)

           return TRUE;

     

             HRESULT hr = m_cpRecoEngine->SetRecoState( SPRST_ACTIVE );

       if (FAILED(hr))

             return FALSE;

     

       m_bOnDictation = TRUE;

       return TRUE;

    }

     

    BOOL CSpeechRecognition::Stop()

    {

       if (! m_bOnDictation)

           return TRUE;

     

           HRESULT hr = m_cpRecoEngine->SetRecoState( SPRST_INACTIVE );

       if (FAILED(hr))

    return FALSE;

     

       m_bOnDictation = FALSE;

       return TRUE;

    }

    函数GetText是获取从语音中已识别出的文字的关键,应该在响应识别事件/消息的响应函数中调用,其代码如下所示。

    void CSpeechRecognition::GetText(WCHAR **ppszCoMemText, ULONG ulStart, ULONG nlCount)

    {

        USES_CONVERSION;

        CSpEvent event;

     

        // Process all of the recognition events

        while (event.GetFrom(m_cpRecoCtxt) == S_OK)

        {

            switch (event.eEventId)

            {

                case SPEI_RECOGNITION:

           // There may be multiple recognition results, so get all of them

                    {

                     HRESULT hr = S_OK;

                     if (nlCount == -1)

                  event.RecoResult()->GetText(SP_GETWHOLEPHRASE,

    SP_GETWHOLEPHRASE, TRUE, ppszCoMemText, NULL);

                     else

                     {

                     ASSERT(nlCount > 0);

                     event.RecoResult()->GetText(ulStart, nlCount, FALSE,

                            ppszCoMemText, NULL);

                     }

                    }

                    break;

            }

        }

    }

    函数InitTokenList调用SpInitTokenComboBoxSpInitTokenListBox函数来实现语音语言在列表或组合列表中的列表显示和选择:

    HRESULT CSpeechRecognition::InitTokenList(HWND hWnd, BOOL bIsComboBox)

    {

       if (bIsComboBox)

           return SpInitTokenComboBox(hWnd, SPCAT_RECOGNIZERS);

       else

           return SpInitTokenListBox(hWnd, SPCAT_RECOGNIZERS);

    }

    语音识别涉及语音的输入,通常用话筒来输入语音。进行语音识别前,需要判断话筒的位置和设置是否合理,以保证语音识别引擎能获得有效的语音输入。函数MicrophoneSetup调用语音识别引擎接口的DisplayUI方法来显示一个设置话筒的向导,如图11-4所示。示例代码如下所示:

    HRESULT CSpeechRecognition::MicrophoneSetup(HWND hWndParent)

    {

       return m_cpRecoEngine->DisplayUI(hWndParent, NULL, SPDUI_MicTraining, NULL, 0);

    }

     

    语音训练是语音识别的重要基础,为了获得期望的识别效果,必须进行语音训练,以让语音识别引擎熟悉说话者的口音。函数VoiceTraining调用语音识别引擎接口的DisplayUI方法来显示一个语音训练向导,如图11-5所示。示例代码如下所示:

    HRESULT CSpeechRecognition::VoiceTraining(HWND hWndParent)

    {

       return m_cpRecoEngine->DisplayUI(hWndParent, NULL, SPDUI_UserTraining, NULL, 0);

    }

     

    CText2Speech类似,CSpeechRecognition类也提供错误处理机制,由GetErrorString函数可以获得错误信息。

    11.3.2  示例:用CSpeechRecognition类编制听写程序

    使用CSpeechRecognition类来编写语音识别程序很简单,下面让我们实现一个听写程序Stenotypist,其界面如图11-6所示。

     

    VisualC++编制Stenotypist的步骤和要点如下:

    1)使用AppWizard生成一个基于对话框的项目Stenotypist

    2)将SpeechRecognition.HSpeechRecognition.CPP增加到Stenotypist项目中;

    3)在资源编辑器中编辑好响应的控件;

    4)用ClassWizard为控件在CStenotypistDlg 类中生成相应的成员;

    5)修改StenotypistDlg.h文件,为类CStenotypistDlg增加相应的变量和函数;

    6)用ClassWizardCStenotypistDlg 类添加对控件和消息的响应函数。StenotypistDlg.h的代码如下。

    #include "SpeechRecognition.h"

     

    ////////////////////////////////////////////////////////////////////

    // CStenotypistDlg dialog

     

    class CStenotypistDlg : public CDialog

    {

    // Construction

    public:

       CStenotypistDlg(CWnd* pParent = NULL); // standard constructor

     

    // Dialog Data

       //{{AFX_DATA(CStenotypistDlg)

       enum { IDD = IDD_STENOTYPIST_DIALOG };

       CButton    m_btDictation;

       CString    m_strText;

       //}}AFX_DATA

     

       // ClassWizard generated virtual function overrides

       //{{AFX_VIRTUAL(CStenotypistDlg)

       protected:

       virtual void DoDataExchange(CDataExchange* pDX); // DDX/DDV support

       //}}AFX_VIRTUAL

     

       CSpeechRecognition   m_SpeechRecognition;

     

    // Implementation

    protected:

       HICON m_hIcon;

     

       // Generated message map functions

       //{{AFX_MSG(CStenotypistDlg)

       virtual BOOL OnInitDialog();

       afx_msg void OnSysCommand(UINT nID, LPARAM lParam);

       afx_msg void OnPaint();

       afx_msg HCURSOR OnQueryDragIcon();

       afx_msg void OnButtonVt();

       afx_msg void OnButtonMs();

       afx_msg void OnButtonDictate();

       //}}AFX_MSG

       afx_msg LRESULT OnSREvent(WPARAM, LPARAM);

       DECLARE_MESSAGE_MAP()

    };

    注意,在CStenotypistDlg类中定义了一个CSpeechRecognition类的对象。

    OnInitDialog函数中调用CSpeechRecognition函数和设置语音语言列表:

    BOOL CStenotypistDlg::OnInitDialog()

    {

       CDialog::OnInitDialog();

     

       // Add "About..." menu item to system menu.

     

       // IDM_ABOUTBOX must be in the system command range.

       ASSERT((IDM_ABOUTBOX & 0xFFF0) == IDM_ABOUTBOX);

       ASSERT(IDM_ABOUTBOX < 0xF000);

     

       CMenu* pSysMenu = GetSystemMenu(FALSE);

       if (pSysMenu != NULL)

       {

           CString strAboutMenu;

           strAboutMenu.LoadString(IDS_ABOUTBOX);

           if (!strAboutMenu.IsEmpty())

           {

              pSysMenu->AppendMenu(MF_SEPARATOR);

              pSysMenu->AppendMenu(MF_STRING, IDM_ABOUTBOX, strAboutMenu);

           }

       }

     

        // Set the icon for this dialog.  The framework does this automatically

       //  when the application's main window is not a dialog

       SetIcon(m_hIcon, TRUE);         // Set big icon

       SetIcon(m_hIcon, FALSE);    // Set small icon

      

       // TODO: Add extra initialization here

       if (! m_SpeechRecognition.Initialize(m_hWnd))

    AfxMessageBox(m_SpeechRecognition.GetErrorString());

    m_SpeechRecognition.InitTokenList(GetDlgItem(IDC_LIST1)->m_hWnd);

     

       m_SpeechRecognition.Stop();

      

       return TRUE;  // return TRUE  unless you set the focus to a control

    }

    开始听写和停止听写的实现较简单,只需调用CSpeechRecognition类的响应函数就能实现,其代码如下所示。注意,停止和开始是互相切换的。

    void CStenotypistDlg::OnButtonDictate()

    {

       if (m_SpeechRecognition.IsDictationOn())

       {

           m_SpeechRecognition.Stop();

           m_btDictation.SetWindowText("听写(&D)");

     

           SetWindowText("听写者 - 请按<听写>按钮开始听写!");

       }

       else

       {

           m_SpeechRecognition.Start();

           m_btDictation.SetWindowText("停止(&S)");

     

           SetWindowText("听写者 - 正在记录,请口述...");

       }

    }

    设置话筒和语音训练也通过直接调用CSpeechRecognition类的成员函数来实现:

    void CStenotypistDlg::OnButtonVt()

    {  m_SpeechRecognition.VoiceTraining(m_hWnd);

    }

     void CStenotypistDlg::OnButtonMs()

    {  m_SpeechRecognition.MicrophoneSetup(m_hWnd);

    }

    为了响应消息WM_SREVENT,需要添加相应的消息响应函数:

    BEGIN_MESSAGE_MAP(CStenotypistDlg, CDialog)

       //{{AFX_MSG_MAP(CStenotypistDlg)

       ON_WM_SYSCOMMAND()

       ON_WM_PAINT()

       ON_WM_QUERYDRAGICON()

       ON_BN_CLICKED(IDC_BUTTON_VT, OnButtonVt)

       ON_BN_CLICKED(IDC_BUTTON_MS, OnButtonMs)

       ON_BN_CLICKED(IDC_BUTTON_DICTATE, OnButtonDictate)

       //}}AFX_MSG_MAP

       ON_MESSAGE(WM_SREVENT, OnSREvent)

    END_MESSAGE_MAP()

     

    LRESULT CStenotypistDlg::OnSREvent(WPARAM, LPARAM)

    {  WCHAR *pwzText;

       m_SpeechRecognition.GetText(&pwzText);

      

       m_strText += CString(pwzText);

       UpdateData(FALSE);

     

       return 0L;

    }

    7)为了调用Speech引擎,应该在Microsoft Visual C++编程环境中设置好相应的includelib设置:

     设置include路径

        通过Project→Settings菜单项打开Project Settings对话框;

        点击C/C++项;

        在Category下拉列表中选取Preprocessor

        在“Additional include directories”编辑框中输入安装Speech SDK的include的路径,默认的路径是C:/Program Files/Microsoft Speech SDK 5.1/Include

     设置lib信息

        通过Project→Settings菜单项打开Project Settings对话框;

        选择Link项;

        在Category下拉列表中选取Input

        在“Additional library path”编辑框中输入安装Speech SDK的lib的路径,默认的路径是C:/Program Files/Microsoft Speech SDK 5.1/ Lib/i386

        将“sapi.lib输入“Object/library modules所标识的编辑框中

    8)编译连接该项目,就可让听写者开始听写了。

    Stenotypist项目的所有源代码都存放在附盘的/Source/Stenotypist目录下。

  • 相关阅读:
    SQL语句获取数据库名、表名、储存过程以及参数列表 狼
    CKEditor3.3+CKFinder2.0附带CKF去除水印 狼
    media player 和realplayer 编程接口 狼
    教您如何去认识人!(识人术) 狼
    引用 JS判断浏览器类型方法 狼
    (转)初始化多维数组
    kmeans 聚类 (代码为: 博客数据聚类) (python )
    对博客订阅源URL中的单词进行计数 (仅限英文博客,中文订阅源不支持 )
    eclipse +python 修改 各种颜色 +字体
    常见错误收集: lucene 读取word文档问题
  • 原文地址:https://www.cnblogs.com/ldjhust/p/3191550.html
Copyright © 2020-2023  润新知