Add non-streaming speech recognition examples for MFC (#212)

This commit is contained in:
Fangjun Kuang
2023-07-14 17:00:14 +08:00
committed by GitHub
parent bebc1f1398
commit 0abd7ce881
22 changed files with 1153 additions and 63 deletions

View File

@@ -98,6 +98,7 @@ jobs:
cd mfc-examples/$arch/Release cd mfc-examples/$arch/Release
cp StreamingSpeechRecognition.exe sherpa-onnx-streaming-${SHERPA_ONNX_VERSION}.exe cp StreamingSpeechRecognition.exe sherpa-onnx-streaming-${SHERPA_ONNX_VERSION}.exe
cp NonStreamingSpeechRecognition.exe sherpa-onnx-non-streaming-${SHERPA_ONNX_VERSION}.exe
ls -lh ls -lh
- name: Upload artifact - name: Upload artifact
@@ -106,10 +107,24 @@ jobs:
name: streaming-speech-recognition-${{ matrix.arch }} name: streaming-speech-recognition-${{ matrix.arch }}
path: ./mfc-examples/${{ matrix.arch }}/Release/StreamingSpeechRecognition.exe path: ./mfc-examples/${{ matrix.arch }}/Release/StreamingSpeechRecognition.exe
- name: Release pre-compiled binaries and libs for macOS - name: Upload artifact
uses: actions/upload-artifact@v2
with:
name: non-streaming-speech-recognition-${{ matrix.arch }}
path: ./mfc-examples/${{ matrix.arch }}/Release/NonStreamingSpeechRecognition.exe
- name: Release pre-compiled binaries and libs for Windows ${{ matrix.arch }}
if: env.RELEASE == 'true' if: env.RELEASE == 'true'
uses: svenstaro/upload-release-action@v2 uses: svenstaro/upload-release-action@v2
with: with:
file_glob: true file_glob: true
overwrite: true overwrite: true
file: ./mfc-examples/${{ matrix.arch }}/Release/sherpa-onnx*.exe file: ./mfc-examples/${{ matrix.arch }}/Release/sherpa-onnx-streaming-*.exe
- name: Release pre-compiled binaries and libs for Windows ${{ matrix.arch }}
if: env.RELEASE == 'true'
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
overwrite: true
file: ./mfc-examples/${{ matrix.arch }}/Release/sherpa-onnx-non-streaming-*.exe

View File

@@ -113,7 +113,7 @@ function(download_onnxruntime)
set(onnxruntime_URL "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x86-static-1.15.1.tar.bz2") set(onnxruntime_URL "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x86-static-1.15.1.tar.bz2")
set(onnxruntime_URL2 "") set(onnxruntime_URL2 "")
set(onnxruntime_HASH "SHA256=a2b33a3e8a1f89cddf303f0a97a5a88f4202579c653cfb29158c8cf7da3734eb") set(onnxruntime_HASH "SHA256=94d9a30976b5c4a5dff7508d00f141835916e5a36315d5f53be9b3edb85148b5")
endif() endif()
if(SHERPA_ONNX_ENABLE_GPU) if(SHERPA_ONNX_ENABLE_GPU)
@@ -161,7 +161,7 @@ function(download_onnxruntime)
set(onnxruntime_URL "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x64-static-1.15.1.tar.bz2") set(onnxruntime_URL "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x64-static-1.15.1.tar.bz2")
set(onnxruntime_URL2 "") set(onnxruntime_URL2 "")
set(onnxruntime_HASH "SHA256=f5c19ac1fc6a61c78a231a41df10aede2586665ab397bdc3f007eb8d2c8d4a19") set(onnxruntime_HASH "SHA256=c809a8510a89b8b37ae7d563c39229db22bac8fbefcbfe5c81a60b367d065b1b")
endif() endif()
endif() endif()
# After downloading, it contains: # After downloading, it contains:

View File

@@ -0,0 +1,86 @@
// NonStreamingSpeechRecognition.cpp : Defines the class behaviors for the
// application.
//
// clang-format off
#include "pch.h"
#include "framework.h"
#include "NonStreamingSpeechRecognitionDlg.h"
#include "NonStreamingSpeechRecognition.h"
// clang-format on
#ifdef _DEBUG
#define new DEBUG_NEW
#endif
// CNonStreamingSpeechRecognitionApp
BEGIN_MESSAGE_MAP(CNonStreamingSpeechRecognitionApp, CWinApp)
ON_COMMAND(ID_HELP, &CWinApp::OnHelp)
END_MESSAGE_MAP()
// CNonStreamingSpeechRecognitionApp construction
CNonStreamingSpeechRecognitionApp::CNonStreamingSpeechRecognitionApp() {
// TODO: add construction code here,
// Place all significant initialization in InitInstance
}
// The one and only CNonStreamingSpeechRecognitionApp object
CNonStreamingSpeechRecognitionApp theApp;
// CNonStreamingSpeechRecognitionApp initialization
BOOL CNonStreamingSpeechRecognitionApp::InitInstance() {
CWinApp::InitInstance();
// Create the shell manager, in case the dialog contains
// any shell tree view or shell list view controls.
CShellManager *pShellManager = new CShellManager;
// Activate "Windows Native" visual manager for enabling themes in MFC
// controls
CMFCVisualManager::SetDefaultManager(RUNTIME_CLASS(CMFCVisualManagerWindows));
// Standard initialization
// If you are not using these features and wish to reduce the size
// of your final executable, you should remove from the following
// the specific initialization routines you do not need
// Change the registry key under which our settings are stored
// TODO: You should modify this string to be something appropriate
// such as the name of your company or organization
SetRegistryKey(_T("Local AppWizard-Generated Applications"));
CNonStreamingSpeechRecognitionDlg dlg;
m_pMainWnd = &dlg;
INT_PTR nResponse = dlg.DoModal();
if (nResponse == IDOK) {
// TODO: Place code here to handle when the dialog is
// dismissed with OK
} else if (nResponse == IDCANCEL) {
// TODO: Place code here to handle when the dialog is
// dismissed with Cancel
} else if (nResponse == -1) {
TRACE(traceAppMsg, 0,
"Warning: dialog creation failed, so application is terminating "
"unexpectedly.\n");
TRACE(traceAppMsg, 0,
"Warning: if you are using MFC controls on the dialog, you cannot "
"#define _AFX_NO_MFC_CONTROLS_IN_DIALOGS.\n");
}
// Delete the shell manager created above.
if (pShellManager != nullptr) {
delete pShellManager;
}
#if !defined(_AFXDLL) && !defined(_AFX_NO_MFC_CONTROLS_IN_DIALOGS)
ControlBarCleanUp();
#endif
// Since the dialog has been closed, return FALSE so that we exit the
// application, rather than start the application's message pump.
return FALSE;
}

View File

@@ -0,0 +1,31 @@
// NonStreamingSpeechRecognition.h : main header file for the PROJECT_NAME
// application
//
#pragma once
#ifndef __AFXWIN_H__
#error "include 'pch.h' before including this file for PCH"
#endif
#include "resource.h" // main symbols
// CNonStreamingSpeechRecognitionApp:
// See NonStreamingSpeechRecognition.cpp for the implementation of this class
//
class CNonStreamingSpeechRecognitionApp : public CWinApp {
public:
CNonStreamingSpeechRecognitionApp();
// Overrides
public:
virtual BOOL InitInstance();
// Implementation
DECLARE_MESSAGE_MAP()
};
extern CNonStreamingSpeechRecognitionApp theApp;

View File

@@ -0,0 +1,219 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<VCProjectVersion>17.0</VCProjectVersion>
<ProjectGuid>{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}</ProjectGuid>
<Keyword>MFCProj</Keyword>
<RootNamespace>NonStreamingSpeechRecognition</RootNamespace>
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
<UseOfMfc>Static</UseOfMfc>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
<UseOfMfc>Static</UseOfMfc>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
<UseOfMfc>Static</UseOfMfc>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
<UseOfMfc>Static</UseOfMfc>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="sherpa-onnx-deps.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="sherpa-onnx-deps.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="sherpa-onnx-deps.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="sherpa-onnx-deps.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<PrecompiledHeader>Use</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>_WINDOWS;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
<Midl>
<MkTypLibCompatible>false</MkTypLibCompatible>
<ValidateAllParameters>true</ValidateAllParameters>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</Midl>
<ResourceCompile>
<Culture>0x0409</Culture>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ResourceCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<PrecompiledHeader>Use</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
</Link>
<Midl>
<MkTypLibCompatible>false</MkTypLibCompatible>
<ValidateAllParameters>true</ValidateAllParameters>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</Midl>
<ResourceCompile>
<Culture>0x0409</Culture>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ResourceCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<PrecompiledHeader>Use</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>_WINDOWS;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
</Link>
<Midl>
<MkTypLibCompatible>false</MkTypLibCompatible>
<ValidateAllParameters>true</ValidateAllParameters>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</Midl>
<ResourceCompile>
<Culture>0x0409</Culture>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ResourceCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<PrecompiledHeader>Use</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
<Midl>
<MkTypLibCompatible>false</MkTypLibCompatible>
<ValidateAllParameters>true</ValidateAllParameters>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</Midl>
<ResourceCompile>
<Culture>0x0409</Culture>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ResourceCompile>
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="framework.h" />
<ClInclude Include="NonStreamingSpeechRecognition.h" />
<ClInclude Include="NonStreamingSpeechRecognitionDlg.h" />
<ClInclude Include="pch.h" />
<ClInclude Include="Resource.h" />
<ClInclude Include="targetver.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="NonStreamingSpeechRecognition.cpp" />
<ClCompile Include="NonStreamingSpeechRecognitionDlg.cpp" />
<ClCompile Include="pch.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="NonStreamingSpeechRecognition.rc" />
</ItemGroup>
<ItemGroup>
<None Include="res\NonStreamingSpeechRecognition.rc2" />
</ItemGroup>
<ItemGroup>
<Image Include="res\NonStreamingSpeechRecognition.ico" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

View File

@@ -0,0 +1,63 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
</Filter>
<Filter Include="Resource Files">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="NonStreamingSpeechRecognition.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="NonStreamingSpeechRecognitionDlg.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="framework.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="targetver.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="Resource.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="pch.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="NonStreamingSpeechRecognition.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="NonStreamingSpeechRecognitionDlg.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="pch.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="NonStreamingSpeechRecognition.rc">
<Filter>Resource Files</Filter>
</ResourceCompile>
</ItemGroup>
<ItemGroup>
<None Include="res\NonStreamingSpeechRecognition.rc2">
<Filter>Resource Files</Filter>
</None>
</ItemGroup>
<ItemGroup>
<Image Include="res\NonStreamingSpeechRecognition.ico">
<Filter>Resource Files</Filter>
</Image>
</ItemGroup>
</Project>

View File

@@ -0,0 +1,473 @@
// NonStreamingSpeechRecognitionDlg.cpp : implementation file
//
// clang-format off
#include "pch.h"
#include "framework.h"
#include "afxdialogex.h"
#include "NonStreamingSpeechRecognition.h"
#include "NonStreamingSpeechRecognitionDlg.h"
// clang-format on
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
#ifdef _DEBUG
#define new DEBUG_NEW
#endif
Microphone::Microphone() {
PaError err = Pa_Initialize();
if (err != paNoError) {
fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
exit(-2);
}
}
Microphone::~Microphone() {
PaError err = Pa_Terminate();
if (err != paNoError) {
fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
exit(-2);
}
}
// see
// https://stackoverflow.com/questions/7153935/how-to-convert-utf-8-stdstring-to-utf-16-stdwstring
static std::wstring Utf8ToUtf16(const std::string &utf8) {
std::vector<unsigned long> unicode;
size_t i = 0;
while (i < utf8.size()) {
unsigned long uni;
size_t todo;
bool error = false;
unsigned char ch = utf8[i++];
if (ch <= 0x7F) {
uni = ch;
todo = 0;
} else if (ch <= 0xBF) {
throw std::logic_error("not a UTF-8 string");
} else if (ch <= 0xDF) {
uni = ch & 0x1F;
todo = 1;
} else if (ch <= 0xEF) {
uni = ch & 0x0F;
todo = 2;
} else if (ch <= 0xF7) {
uni = ch & 0x07;
todo = 3;
} else {
throw std::logic_error("not a UTF-8 string");
}
for (size_t j = 0; j < todo; ++j) {
if (i == utf8.size()) throw std::logic_error("not a UTF-8 string");
unsigned char ch = utf8[i++];
if (ch < 0x80 || ch > 0xBF) throw std::logic_error("not a UTF-8 string");
uni <<= 6;
uni += ch & 0x3F;
}
if (uni >= 0xD800 && uni <= 0xDFFF)
throw std::logic_error("not a UTF-8 string");
if (uni > 0x10FFFF) throw std::logic_error("not a UTF-8 string");
unicode.push_back(uni);
}
std::wstring utf16;
for (size_t i = 0; i < unicode.size(); ++i) {
unsigned long uni = unicode[i];
if (uni <= 0xFFFF) {
utf16 += (wchar_t)uni;
} else {
uni -= 0x10000;
utf16 += (wchar_t)((uni >> 10) + 0xD800);
utf16 += (wchar_t)((uni & 0x3FF) + 0xDC00);
}
}
return utf16;
}
static std::string Cat(const std::vector<std::string> &results) {
std::ostringstream os;
std::string sep;
int i = 0;
for (i = 0; i != results.size(); ++i) {
os << sep << i << ": " << results[i];
sep = "\r\n";
}
return os.str();
}
// CNonStreamingSpeechRecognitionDlg dialog
CNonStreamingSpeechRecognitionDlg::CNonStreamingSpeechRecognitionDlg(
CWnd *pParent /*=nullptr*/)
: CDialogEx(IDD_NONSTREAMINGSPEECHRECOGNITION_DIALOG, pParent) {
m_hIcon = AfxGetApp()->LoadIcon(IDR_MAINFRAME);
}
CNonStreamingSpeechRecognitionDlg::~CNonStreamingSpeechRecognitionDlg() {
if (recognizer_) {
DestroyOfflineRecognizer(recognizer_);
recognizer_ = nullptr;
}
}
void CNonStreamingSpeechRecognitionDlg::DoDataExchange(CDataExchange *pDX) {
CDialogEx::DoDataExchange(pDX);
DDX_Control(pDX, IDC_EDIT1, my_text_);
DDX_Control(pDX, IDOK, my_btn_);
}
BEGIN_MESSAGE_MAP(CNonStreamingSpeechRecognitionDlg, CDialogEx)
ON_WM_PAINT()
ON_WM_QUERYDRAGICON()
ON_BN_CLICKED(IDOK, &CNonStreamingSpeechRecognitionDlg::OnBnClickedOk)
END_MESSAGE_MAP()
// CNonStreamingSpeechRecognitionDlg message handlers
BOOL CNonStreamingSpeechRecognitionDlg::OnInitDialog() {
CDialogEx::OnInitDialog();
// Set the icon for this dialog. The framework does this automatically
// when the application's main window is not a dialog
SetIcon(m_hIcon, TRUE); // Set big icon
SetIcon(m_hIcon, FALSE); // Set small icon
// TODO: Add extra initialization here
InitMicrophone();
return TRUE; // return TRUE unless you set the focus to a control
}
// If you add a minimize button to your dialog, you will need the code below
// to draw the icon. For MFC applications using the document/view model,
// this is automatically done for you by the framework.
void CNonStreamingSpeechRecognitionDlg::OnPaint() {
if (IsIconic()) {
CPaintDC dc(this); // device context for painting
SendMessage(WM_ICONERASEBKGND, reinterpret_cast<WPARAM>(dc.GetSafeHdc()),
0);
// Center icon in client rectangle
int cxIcon = GetSystemMetrics(SM_CXICON);
int cyIcon = GetSystemMetrics(SM_CYICON);
CRect rect;
GetClientRect(&rect);
int x = (rect.Width() - cxIcon + 1) / 2;
int y = (rect.Height() - cyIcon + 1) / 2;
// Draw the icon
dc.DrawIcon(x, y, m_hIcon);
} else {
CDialogEx::OnPaint();
}
}
// The system calls this function to obtain the cursor to display while the user
// drags
// the minimized window.
HCURSOR CNonStreamingSpeechRecognitionDlg::OnQueryDragIcon() {
return static_cast<HCURSOR>(m_hIcon);
}
static int32_t RecordCallback(const void *input_buffer,
void * /*output_buffer*/,
unsigned long frames_per_buffer, // NOLINT
const PaStreamCallbackTimeInfo * /*time_info*/,
PaStreamCallbackFlags /*status_flags*/,
void *user_data) {
auto dlg = reinterpret_cast<CNonStreamingSpeechRecognitionDlg *>(user_data);
auto begin = reinterpret_cast<const float *>(input_buffer);
auto end = begin + frames_per_buffer;
dlg->samples_.insert(dlg->samples_.end(), begin, end);
return dlg->started_ ? paContinue : paComplete;
}
void CNonStreamingSpeechRecognitionDlg::OnBnClickedOk() {
if (!recognizer_) {
AppendLineToMultilineEditCtrl("Creating recognizer...");
AppendLineToMultilineEditCtrl("It will take several seconds. Please wait");
InitRecognizer();
if (!recognizer_) {
// failed to create the recognizer
return;
}
AppendLineToMultilineEditCtrl("Recognizer created!");
}
if (!started_) {
samples_.clear();
started_ = true;
PaStreamParameters param;
param.device = Pa_GetDefaultInputDevice();
const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
param.channelCount = 1;
param.sampleFormat = paFloat32;
param.suggestedLatency = info->defaultLowInputLatency;
param.hostApiSpecificStreamInfo = nullptr;
float sample_rate = config_.feat_config.sample_rate;
pa_stream_ = nullptr;
PaError err =
Pa_OpenStream(&pa_stream_, &param, nullptr, /* &outputParameters, */
sample_rate,
0, // frames per buffer
paClipOff, // we won't output out of range samples
// so don't bother clipping them
RecordCallback, this);
if (err != paNoError) {
AppendLineToMultilineEditCtrl(std::string("PortAudio error: ") +
Pa_GetErrorText(err));
my_btn_.EnableWindow(FALSE);
return;
}
err = Pa_StartStream(pa_stream_);
if (err != paNoError) {
AppendLineToMultilineEditCtrl(std::string("PortAudio error: ") +
Pa_GetErrorText(err));
my_btn_.EnableWindow(FALSE);
return;
}
AppendLineToMultilineEditCtrl(
"\r\nStarted! Please speak and click stop.\r\n");
my_btn_.SetWindowText(_T("Stop"));
} else {
started_ = false;
Pa_Sleep(200); // sleep for 200ms
if (pa_stream_) {
PaError err = Pa_CloseStream(pa_stream_);
if (err != paNoError) {
AppendLineToMultilineEditCtrl(std::string("PortAudio error: ") +
Pa_GetErrorText(err));
my_btn_.EnableWindow(FALSE);
return;
}
}
pa_stream_ = nullptr;
SherpaOnnxOfflineStream *stream = CreateOfflineStream(recognizer_);
AcceptWaveformOffline(stream, config_.feat_config.sample_rate,
samples_.data(), samples_.size());
DecodeOfflineStream(recognizer_, stream);
SherpaOnnxOfflineRecognizerResult *r = GetOfflineStreamResult(stream);
results_.emplace_back(r->text);
auto str = Utf8ToUtf16(Cat(results_).c_str());
my_text_.SetWindowText(str.c_str());
my_text_.SetFocus();
my_text_.SetSel(-1);
DestroyOfflineRecognizerResult(r);
DestroyOfflineStream(stream);
// AfxMessageBox("Stopped", MB_OK);
my_btn_.SetWindowText(_T("Start"));
AppendLineToMultilineEditCtrl("\r\nStopped. Please click start and speak");
}
}
void CNonStreamingSpeechRecognitionDlg::InitMicrophone() {
int default_device = Pa_GetDefaultInputDevice();
int device_count = Pa_GetDeviceCount();
if (default_device == paNoDevice) {
// CString str;
// str.Format(_T("No default input device found!"));
// AfxMessageBox(str, MB_OK | MB_ICONSTOP);
// exit(-1);
AppendLineToMultilineEditCtrl("No default input device found!");
my_btn_.EnableWindow(FALSE);
return;
}
AppendLineToMultilineEditCtrl(std::string("Selected device ") +
Pa_GetDeviceInfo(default_device)->name);
}
bool CNonStreamingSpeechRecognitionDlg::Exists(const std::string &filename) {
std::ifstream is(filename);
return is.good();
}
void CNonStreamingSpeechRecognitionDlg::ShowInitRecognizerHelpMessage() {
my_btn_.EnableWindow(FALSE);
std::string msg =
"\r\nPlease go to\r\n"
"https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html "
"\r\n";
msg += "to download a non-streaming model, i.e., an offline model.\r\n";
msg +=
"You need to rename them to encoder.onnx, decoder.onnx, and "
"joiner.onnx correspoondingly.\r\n\r\n";
msg += "It supports both transducer models and paraformer models.\r\n\r\n";
msg +=
"We give two examples below to show you how to download models\r\n\r\n";
msg += "(1) Transducer\r\n\r\n";
msg +=
"We use "
"https://huggingface.co/pkufool/"
"icefall-asr-zipformer-wenetspeech-20230615 below\r\n";
msg +=
"wget "
"https://huggingface.co/pkufool/"
"icefall-asr-zipformer-wenetspeech-20230615/resolve/main/exp/"
"encoder-epoch-12-avg-4.onnx\r\n";
msg +=
"wget "
"https://huggingface.co/pkufool/"
"icefall-asr-zipformer-wenetspeech-20230615/resolve/main/exp/"
"decoder-epoch-12-avg-4.onnx\r\n";
msg +=
"wget "
"https://huggingface.co/pkufool/"
"icefall-asr-zipformer-wenetspeech-20230615/resolve/main/exp/"
"joiner-epoch-12-avg-4.onnx\r\n";
msg += "\r\n Now rename them\r\n";
msg += "mv encoder-epoch-12-avg-4.onnx encoder.onnx\r\n";
msg += "mv decoder-epoch-12-avg-4.onnx decoder.onnx\r\n";
msg += "mv joiner-epoch-12-avg-4.onnx joiner.onnx\r\n\r\n";
msg += "(2) Paraformer\r\n\r\n";
msg +=
"wget "
"https://huggingface.co/csukuangfj/"
"sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/model.onnx\r\n";
msg +=
"wget "
"https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/"
"resolve/main/tokens.txt\r\n\r\n";
msg += "\r\n Now rename them\r\n";
msg += "mv model.onnx paraformer.onnx\r\n";
msg += "\r\n";
msg += "That's it!\r\n";
AppendLineToMultilineEditCtrl(msg);
}
void CNonStreamingSpeechRecognitionDlg::InitParaformer() {
std::string paraformer = "./paraformer.onnx";
std::string tokens = "./tokens.txt";
bool is_ok = true;
if (Exists("./paraformer.int8.onnx")) {
paraformer = "./paraformer.int8.onnx";
} else if (!Exists(paraformer)) {
std::string msg = paraformer + " does not exist!";
AppendLineToMultilineEditCtrl(msg);
is_ok = false;
}
if (!Exists(tokens)) {
std::string msg = tokens + " does not exist!";
AppendLineToMultilineEditCtrl(msg);
is_ok = false;
}
if (!is_ok) {
ShowInitRecognizerHelpMessage();
return;
}
memset(&config_, 0, sizeof(config_));
config_.feat_config.sample_rate = 16000;
config_.feat_config.feature_dim = 80;
config_.model_config.paraformer.model = paraformer.c_str();
config_.model_config.tokens = tokens.c_str();
config_.model_config.num_threads = 1;
config_.model_config.debug = 1;
config_.decoding_method = "greedy_search";
config_.max_active_paths = 4;
recognizer_ = CreateOfflineRecognizer(&config_);
}
void CNonStreamingSpeechRecognitionDlg::InitRecognizer() {
if (Exists("./paraformer.onnx") || Exists("./paraformer.int8.onnx")) {
InitParaformer();
return;
}
// assume it is transducer
std::string encoder = "./encoder.onnx";
std::string decoder = "./decoder.onnx";
std::string joiner = "./joiner.onnx";
std::string tokens = "./tokens.txt";
bool is_ok = true;
if (!Exists(encoder)) {
std::string msg = encoder + " does not exist!";
AppendLineToMultilineEditCtrl(msg);
is_ok = false;
}
if (!Exists(decoder)) {
std::string msg = decoder + " does not exist!";
AppendLineToMultilineEditCtrl(msg);
is_ok = false;
}
if (!Exists(joiner)) {
std::string msg = joiner + " does not exist!";
AppendLineToMultilineEditCtrl(msg);
is_ok = false;
}
if (!Exists(tokens)) {
std::string msg = tokens + " does not exist!";
AppendLineToMultilineEditCtrl(msg);
is_ok = false;
}
if (!is_ok) {
ShowInitRecognizerHelpMessage();
return;
}
memset(&config_, 0, sizeof(config_));
config_.feat_config.sample_rate = 16000;
config_.feat_config.feature_dim = 80;
config_.model_config.transducer.encoder = encoder.c_str();
config_.model_config.transducer.decoder = decoder.c_str();
config_.model_config.transducer.joiner = joiner.c_str();
config_.model_config.tokens = tokens.c_str();
config_.model_config.num_threads = 1;
config_.model_config.debug = 0;
config_.decoding_method = "greedy_search";
config_.max_active_paths = 4;
recognizer_ = CreateOfflineRecognizer(&config_);
}
void CNonStreamingSpeechRecognitionDlg::AppendTextToEditCtrl(
const std::string &s) {
// get the initial text length
int nLength = my_text_.GetWindowTextLength();
// put the selection at the end of text
my_text_.SetSel(nLength, nLength);
// replace the selection
std::wstring wstr = Utf8ToUtf16(s);
my_text_.ReplaceSel(wstr.c_str());
}
void CNonStreamingSpeechRecognitionDlg::AppendLineToMultilineEditCtrl(
const std::string &s) {
AppendTextToEditCtrl("\r\n" + s);
}

View File

@@ -0,0 +1,73 @@
// NonStreamingSpeechRecognitionDlg.h : header file
//
#pragma once
#include <string>
#include <vector>
#include "portaudio.h"
#include "sherpa-onnx/c-api/c-api.h"
class Microphone {
public:
Microphone();
~Microphone();
};
// CNonStreamingSpeechRecognitionDlg dialog
class CNonStreamingSpeechRecognitionDlg : public CDialogEx {
// Construction
public:
CNonStreamingSpeechRecognitionDlg(
CWnd *pParent = nullptr); // standard constructor
~CNonStreamingSpeechRecognitionDlg();
// Dialog Data
#ifdef AFX_DESIGN_TIME
enum { IDD = IDD_NONSTREAMINGSPEECHRECOGNITION_DIALOG };
#endif
protected:
virtual void DoDataExchange(CDataExchange *pDX); // DDX/DDV support
// Implementation
protected:
HICON m_hIcon;
// Generated message map functions
virtual BOOL OnInitDialog();
afx_msg void OnPaint();
afx_msg HCURSOR OnQueryDragIcon();
DECLARE_MESSAGE_MAP()
public:
afx_msg void OnBnClickedOk();
int RunThread();
private:
Microphone mic_;
SherpaOnnxOfflineRecognizer *recognizer_ = nullptr;
SherpaOnnxOfflineRecognizerConfig config_;
PaStream *pa_stream_ = nullptr;
CButton my_btn_;
CEdit my_text_;
std::vector<std::string> results_;
public:
bool started_ = false;
std::vector<float> samples_;
private:
void AppendTextToEditCtrl(const std::string &s);
void AppendLineToMultilineEditCtrl(const std::string &s);
void InitMicrophone();
bool Exists(const std::string &filename);
void InitRecognizer();
void InitParaformer();
void ShowInitRecognizerHelpMessage();
};

View File

@@ -0,0 +1,18 @@
//{{NO_DEPENDENCIES}}
// Microsoft Visual C++ generated include file.
// Used by NonStreamingSpeechRecognition.rc
//
#define IDD_NONSTREAMINGSPEECHRECOGNITION_DIALOG 102
#define IDR_MAINFRAME 128
#define IDC_EDIT1 1000
// Next default values for new objects
//
#ifdef APSTUDIO_INVOKED
#ifndef APSTUDIO_READONLY_SYMBOLS
#define _APS_NEXT_RESOURCE_VALUE 130
#define _APS_NEXT_COMMAND_VALUE 32771
#define _APS_NEXT_CONTROL_VALUE 1001
#define _APS_NEXT_SYMED_VALUE 101
#endif
#endif

View File

@@ -0,0 +1,26 @@
#pragma once
#ifndef VC_EXTRALEAN
#define VC_EXTRALEAN // Exclude rarely-used stuff from Windows headers
#endif
#include "targetver.h"
#define _ATL_CSTRING_EXPLICIT_CONSTRUCTORS // some CString constructors will be
// explicit
// turns off MFC's hiding of some common and often safely ignored warning
// messages
#define _AFX_ALL_WARNINGS
#include <afxext.h> // MFC extensions
#include <afxwin.h> // MFC core and standard components
#ifndef _AFX_NO_OLE_SUPPORT
#include <afxdtctl.h> // MFC support for Internet Explorer 4 Common Controls
#endif
#ifndef _AFX_NO_AFXCMN_SUPPORT
#include <afxcmn.h> // MFC support for Windows Common Controls
#endif // _AFX_NO_AFXCMN_SUPPORT
#include <afxcontrolbars.h> // MFC support for ribbons and control bars

View File

@@ -0,0 +1,6 @@
// pch.cpp: source file corresponding to the pre-compiled header
#include "pch.h"
// When you are using pre-compiled headers, this source file is necessary for
// compilation to succeed.

View File

@@ -0,0 +1,15 @@
// pch.h: This is a precompiled header file.
// Files listed below are compiled only once, improving build performance for
// future builds. This also affects IntelliSense performance, including code
// completion and many code browsing features. However, files listed here are
// ALL re-compiled if any one of them is updated between builds. Do not add
// files here that you will be updating frequently as this negates the
// performance advantage.
#ifndef PCH_H
#define PCH_H
// add headers that you want to pre-compile here
#include "framework.h"
#endif // PCH_H

Binary file not shown.

After

Width:  |  Height:  |  Size: 66 KiB

View File

@@ -0,0 +1,50 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ImportGroup Label="PropertySheets" />
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<SherpaOnnxBuildDirectory>..\..\build</SherpaOnnxBuildDirectory>
<SherpaOnnxInstallDirectory>..\..\build\install</SherpaOnnxInstallDirectory>
<SherpaOnnxLibraries>
sherpa-onnx-portaudio_static.lib;
sherpa-onnx-c-api.lib;
sherpa-onnx-core.lib;
kaldi-native-fbank-core.lib;
absl_base.lib;
absl_city.lib;
absl_hash.lib;
absl_low_level_hash.lib;
absl_raw_hash_set.lib;
absl_raw_logging_internal.lib;
absl_throw_delegate.lib;
clog.lib;
cpuinfo.lib;
flatbuffers.lib;
libprotobuf-lite.lib;
onnx.lib;
onnx_proto.lib;
onnxruntime_common.lib;
onnxruntime_flatbuffers.lib;
onnxruntime_framework.lib;
onnxruntime_graph.lib;
onnxruntime_mlas.lib;
onnxruntime_optimizer.lib;
onnxruntime_providers.lib;
onnxruntime_session.lib;
onnxruntime_util.lib;
re2.lib;
</SherpaOnnxLibraries>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<AdditionalIncludeDirectories>
$(SherpaOnnxBuildDirectory)\_deps\portaudio-src\include;
$(SherpaOnnxInstallDirectory)\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>$(SherpaOnnxInstallDirectory)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>$(SherpaOnnxLibraries);</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemGroup />
</Project>

View File

@@ -0,0 +1,9 @@
#pragma once
// Including SDKDDKVer.h defines the highest available Windows platform.
// If you wish to build your application for a previous Windows platform,
// include WinSDKVer.h and set the _WIN32_WINNT macro to the platform you wish
// to support before including SDKDDKVer.h.
#include <SDKDDKVer.h>

View File

@@ -3,11 +3,19 @@
This directory contains examples showing how to use Next-gen Kaldi in MFC This directory contains examples showing how to use Next-gen Kaldi in MFC
for speech recognition. for speech recognition.
Caution: You need to use Windows and install Visual Studio in order to run it. Caution: You need to use Windows and install Visual Studio 2022 in order to
compile it.
Hint: If you don't want to install Visual Studio, you can find below
about how to download pre-compiled `exe`.
We use bash script below to demonstrate how to use it. Please change We use bash script below to demonstrate how to use it. Please change
the commands accordingly for Windows. the commands accordingly for Windows.
## Streaming speech recognition ## How to compile
First, we need to compile sherpa-onnx:
```bash ```bash
mkdir -p $HOME/open-source mkdir -p $HOME/open-source
@@ -19,7 +27,6 @@ mkdir build
cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_PREFIX=./install .. cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_PREFIX=./install ..
cmake --build . --config Release --target install cmake --build . --config Release --target install
cd ../mfc-examples cd ../mfc-examples
msbuild ./mfc-examples.sln /property:Configuration=Release /property:Platform=x64 msbuild ./mfc-examples.sln /property:Configuration=Release /property:Platform=x64
@@ -27,26 +34,13 @@ msbuild ./mfc-examples.sln /property:Configuration=Release /property:Platform=x6
# now run the program # now run the program
./x64/Release/StreamingSpeechRecognition.exe ./x64/Release/StreamingSpeechRecognition.exe
./x64/Release/NonStreamingSpeechRecognition.exe
``` ```
Note that we also need to download pre-trained models. Please If you don't want to compile the project by yourself, you can download
refer to https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html pre-compiled `exe` from https://github.com/k2-fsa/sherpa-onnx/releases
for a list of streaming models.
We use the following model for demonstration. For instance, you can use the following addresses:
```bash - https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.5.1/sherpa-onnx-streaming-v1.5.1.exe
cd $HOME/open-source/sherpa-onnx/mfc-examples/x64/Release - https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.5.1/sherpa-onnx-non-streaming-v1.5.1.exe
wget https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx
wget https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx
wget https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx
wget https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/data/lang_char/tokens.txt
# now rename
mv encoder-epoch-12-avg-4-chunk-16-left-128.onnx encoder.onnx
mv decoder-epoch-12-avg-4-chunk-16-left-128.onnx decoder.onnx
mv joiner-epoch-12-avg-4-chunk-16-left-128.onnx joiner.onnx
# Now run it!
./StreamingSpeechRecognition.exe
```

View File

@@ -3,12 +3,14 @@
// application. // application.
// //
// clang-format off
#include "pch.h" #include "pch.h"
#include "framework.h" #include "framework.h"
// clang-format on
#include "StreamingSpeechRecognition.h" #include "StreamingSpeechRecognition.h"
#include "StreamingSpeechRecognitionDlg.h"
#include "StreamingSpeechRecognitionDlg.h"
#ifdef _DEBUG #ifdef _DEBUG
#define new DEBUG_NEW #define new DEBUG_NEW

View File

@@ -1,10 +1,11 @@
// StreamingSpeechRecognitionDlg.cpp : implementation file // StreamingSpeechRecognitionDlg.cpp : implementation file
// //
// clang-format off
#include "pch.h" #include "pch.h"
#include "framework.h" #include "framework.h"
#include "afxdialogex.h" #include "afxdialogex.h"
// clang-format on
#include "StreamingSpeechRecognitionDlg.h" #include "StreamingSpeechRecognitionDlg.h"
@@ -15,7 +16,6 @@
#include "StreamingSpeechRecognition.h" #include "StreamingSpeechRecognition.h"
#ifdef _DEBUG #ifdef _DEBUG
#define new DEBUG_NEW #define new DEBUG_NEW
#endif #endif
@@ -223,6 +223,7 @@ void CStreamingSpeechRecognitionDlg::InitMicrophone() {
// exit(-1); // exit(-1);
AppendLineToMultilineEditCtrl("No default input device found!"); AppendLineToMultilineEditCtrl("No default input device found!");
my_btn_.EnableWindow(FALSE); my_btn_.EnableWindow(FALSE);
return;
} }
AppendLineToMultilineEditCtrl(std::string("Selected device ") + AppendLineToMultilineEditCtrl(std::string("Selected device ") +
Pa_GetDeviceInfo(default_device)->name); Pa_GetDeviceInfo(default_device)->name);
@@ -309,7 +310,6 @@ void CStreamingSpeechRecognitionDlg::InitRecognizer() {
msg += "\r\n"; msg += "\r\n";
msg += "That's it!\r\n"; msg += "That's it!\r\n";
AppendLineToMultilineEditCtrl(msg); AppendLineToMultilineEditCtrl(msg);
return; return;
} }
@@ -398,8 +398,6 @@ void CStreamingSpeechRecognitionDlg::AppendTextToEditCtrl(
// put the selection at the end of text // put the selection at the end of text
my_text_.SetSel(nLength, nLength); my_text_.SetSel(nLength, nLength);
// replace the selection // replace the selection
CString str;
str.Format(_T("%s"), s.c_str());
std::wstring wstr = Utf8ToUtf16(s); std::wstring wstr = Utf8ToUtf16(s);

View File

@@ -1,10 +1,12 @@
 
Microsoft Visual Studio Solution File, Format Version 12.00 Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16 # Visual Studio Version 17
VisualStudioVersion = 16.0.32630.194 VisualStudioVersion = 17.6.33829.357
MinimumVisualStudioVersion = 10.0.40219.1 MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "StreamingSpeechRecognition", "StreamingSpeechRecognition\StreamingSpeechRecognition.vcxproj", "{A79C2604-C33D-497C-9770-D34E118B77FE}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "StreamingSpeechRecognition", "StreamingSpeechRecognition\StreamingSpeechRecognition.vcxproj", "{A79C2604-C33D-497C-9770-D34E118B77FE}"
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "NonStreamingSpeechRecognition", "NonStreamingSpeechRecognition\NonStreamingSpeechRecognition.vcxproj", "{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}"
EndProject
Global Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64 Debug|x64 = Debug|x64
@@ -21,6 +23,14 @@ Global
{A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x64.Build.0 = Release|x64 {A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x64.Build.0 = Release|x64
{A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x86.ActiveCfg = Release|Win32 {A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x86.ActiveCfg = Release|Win32
{A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x86.Build.0 = Release|Win32 {A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x86.Build.0 = Release|Win32
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x64.ActiveCfg = Debug|x64
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x64.Build.0 = Debug|x64
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x86.ActiveCfg = Debug|Win32
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x86.Build.0 = Debug|Win32
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x64.ActiveCfg = Release|x64
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x64.Build.0 = Release|x64
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x86.ActiveCfg = Release|Win32
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x86.Build.0 = Release|Win32
EndGlobalSection EndGlobalSection
GlobalSection(SolutionProperties) = preSolution GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE HideSolutionNode = FALSE

View File

@@ -27,36 +27,38 @@ struct SherpaOnnxDisplay {
std::unique_ptr<sherpa_onnx::Display> impl; std::unique_ptr<sherpa_onnx::Display> impl;
}; };
#define SHERPA_ONNX_OR(x, y) (x ? x : y)
SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer( SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer(
const SherpaOnnxOnlineRecognizerConfig *config) { const SherpaOnnxOnlineRecognizerConfig *config) {
sherpa_onnx::OnlineRecognizerConfig recognizer_config; sherpa_onnx::OnlineRecognizerConfig recognizer_config;
recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate; recognizer_config.feat_config.sampling_rate = SHERPA_ONNX_OR(config->feat_config.sample_rate, 16000);
recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim; recognizer_config.feat_config.feature_dim = SHERPA_ONNX_OR(config->feat_config.feature_dim, 80);
recognizer_config.model_config.encoder_filename = recognizer_config.model_config.encoder_filename =
config->model_config.encoder; SHERPA_ONNX_OR(config->model_config.encoder, "");
recognizer_config.model_config.decoder_filename = recognizer_config.model_config.decoder_filename =
config->model_config.decoder; SHERPA_ONNX_OR(config->model_config.decoder, "");
recognizer_config.model_config.joiner_filename = config->model_config.joiner; recognizer_config.model_config.joiner_filename = SHERPA_ONNX_OR(config->model_config.joiner, "");
recognizer_config.model_config.tokens = config->model_config.tokens; recognizer_config.model_config.tokens = SHERPA_ONNX_OR(config->model_config.tokens, "");
recognizer_config.model_config.num_threads = config->model_config.num_threads; recognizer_config.model_config.num_threads = SHERPA_ONNX_OR(config->model_config.num_threads, 1);
recognizer_config.model_config.provider = config->model_config.provider; recognizer_config.model_config.provider = SHERPA_ONNX_OR(config->model_config.provider, "cpu");
recognizer_config.model_config.debug = config->model_config.debug; recognizer_config.model_config.debug = SHERPA_ONNX_OR(config->model_config.debug, 0);
recognizer_config.decoding_method = config->decoding_method; recognizer_config.decoding_method = SHERPA_ONNX_OR(config->decoding_method, "greedy_search");
recognizer_config.max_active_paths = config->max_active_paths; recognizer_config.max_active_paths = SHERPA_ONNX_OR(config->max_active_paths, 4);
recognizer_config.enable_endpoint = config->enable_endpoint; recognizer_config.enable_endpoint = SHERPA_ONNX_OR(config->enable_endpoint, 0);
recognizer_config.endpoint_config.rule1.min_trailing_silence = recognizer_config.endpoint_config.rule1.min_trailing_silence =
config->rule1_min_trailing_silence; SHERPA_ONNX_OR(config->rule1_min_trailing_silence, 2.4);
recognizer_config.endpoint_config.rule2.min_trailing_silence = recognizer_config.endpoint_config.rule2.min_trailing_silence =
config->rule2_min_trailing_silence; SHERPA_ONNX_OR(config->rule2_min_trailing_silence, 1.2);
recognizer_config.endpoint_config.rule3.min_utterance_length = recognizer_config.endpoint_config.rule3.min_utterance_length =
config->rule3_min_utterance_length; SHERPA_ONNX_OR(config->rule3_min_utterance_length, 20);
if (config->model_config.debug) { if (config->model_config.debug) {
fprintf(stderr, "%s\n", recognizer_config.ToString().c_str()); fprintf(stderr, "%s\n", recognizer_config.ToString().c_str());
@@ -171,34 +173,34 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer(
const SherpaOnnxOfflineRecognizerConfig *config) { const SherpaOnnxOfflineRecognizerConfig *config) {
sherpa_onnx::OfflineRecognizerConfig recognizer_config; sherpa_onnx::OfflineRecognizerConfig recognizer_config;
recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate; recognizer_config.feat_config.sampling_rate = SHERPA_ONNX_OR(config->feat_config.sample_rate, 16000);
recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim; recognizer_config.feat_config.feature_dim = SHERPA_ONNX_OR(config->feat_config.feature_dim, 80);
recognizer_config.model_config.transducer.encoder_filename = recognizer_config.model_config.transducer.encoder_filename =
config->model_config.transducer.encoder; SHERPA_ONNX_OR(config->model_config.transducer.encoder, "");
recognizer_config.model_config.transducer.decoder_filename = recognizer_config.model_config.transducer.decoder_filename =
config->model_config.transducer.decoder; SHERPA_ONNX_OR(config->model_config.transducer.decoder, "");
recognizer_config.model_config.transducer.joiner_filename = recognizer_config.model_config.transducer.joiner_filename =
config->model_config.transducer.joiner; SHERPA_ONNX_OR(config->model_config.transducer.joiner,"");
recognizer_config.model_config.paraformer.model = recognizer_config.model_config.paraformer.model =
config->model_config.paraformer.model; SHERPA_ONNX_OR(config->model_config.paraformer.model, "");
recognizer_config.model_config.nemo_ctc.model = recognizer_config.model_config.nemo_ctc.model =
config->model_config.nemo_ctc.model; SHERPA_ONNX_OR(config->model_config.nemo_ctc.model, "");
recognizer_config.model_config.tokens = config->model_config.tokens; recognizer_config.model_config.tokens = SHERPA_ONNX_OR(config->model_config.tokens, "");
recognizer_config.model_config.num_threads = config->model_config.num_threads; recognizer_config.model_config.num_threads = SHERPA_ONNX_OR(config->model_config.num_threads, 1);
recognizer_config.model_config.debug = config->model_config.debug; recognizer_config.model_config.debug = SHERPA_ONNX_OR(config->model_config.debug, 0);
recognizer_config.lm_config.model = config->lm_config.model; recognizer_config.lm_config.model = SHERPA_ONNX_OR(config->lm_config.model, "");
recognizer_config.lm_config.scale = config->lm_config.scale; recognizer_config.lm_config.scale = SHERPA_ONNX_OR(config->lm_config.scale, 1.0);
recognizer_config.decoding_method = config->decoding_method; recognizer_config.decoding_method = SHERPA_ONNX_OR(config->decoding_method, "greedy_search");
recognizer_config.max_active_paths = config->max_active_paths; recognizer_config.max_active_paths = SHERPA_ONNX_OR(config->max_active_paths, 4);
if (config->model_config.debug) { if (config->model_config.debug) {
fprintf(stderr, "%s\n", recognizer_config.ToString().c_str()); fprintf(stderr, "%s\n", recognizer_config.ToString().c_str());