Add non-streaming speech recognition examples for MFC (#212)
This commit is contained in:
19
.github/workflows/mfc.yaml
vendored
19
.github/workflows/mfc.yaml
vendored
@@ -98,6 +98,7 @@ jobs:
|
|||||||
|
|
||||||
cd mfc-examples/$arch/Release
|
cd mfc-examples/$arch/Release
|
||||||
cp StreamingSpeechRecognition.exe sherpa-onnx-streaming-${SHERPA_ONNX_VERSION}.exe
|
cp StreamingSpeechRecognition.exe sherpa-onnx-streaming-${SHERPA_ONNX_VERSION}.exe
|
||||||
|
cp NonStreamingSpeechRecognition.exe sherpa-onnx-non-streaming-${SHERPA_ONNX_VERSION}.exe
|
||||||
ls -lh
|
ls -lh
|
||||||
|
|
||||||
- name: Upload artifact
|
- name: Upload artifact
|
||||||
@@ -106,10 +107,24 @@ jobs:
|
|||||||
name: streaming-speech-recognition-${{ matrix.arch }}
|
name: streaming-speech-recognition-${{ matrix.arch }}
|
||||||
path: ./mfc-examples/${{ matrix.arch }}/Release/StreamingSpeechRecognition.exe
|
path: ./mfc-examples/${{ matrix.arch }}/Release/StreamingSpeechRecognition.exe
|
||||||
|
|
||||||
- name: Release pre-compiled binaries and libs for macOS
|
- name: Upload artifact
|
||||||
|
uses: actions/upload-artifact@v2
|
||||||
|
with:
|
||||||
|
name: non-streaming-speech-recognition-${{ matrix.arch }}
|
||||||
|
path: ./mfc-examples/${{ matrix.arch }}/Release/NonStreamingSpeechRecognition.exe
|
||||||
|
|
||||||
|
- name: Release pre-compiled binaries and libs for Windows ${{ matrix.arch }}
|
||||||
if: env.RELEASE == 'true'
|
if: env.RELEASE == 'true'
|
||||||
uses: svenstaro/upload-release-action@v2
|
uses: svenstaro/upload-release-action@v2
|
||||||
with:
|
with:
|
||||||
file_glob: true
|
file_glob: true
|
||||||
overwrite: true
|
overwrite: true
|
||||||
file: ./mfc-examples/${{ matrix.arch }}/Release/sherpa-onnx*.exe
|
file: ./mfc-examples/${{ matrix.arch }}/Release/sherpa-onnx-streaming-*.exe
|
||||||
|
|
||||||
|
- name: Release pre-compiled binaries and libs for Windows ${{ matrix.arch }}
|
||||||
|
if: env.RELEASE == 'true'
|
||||||
|
uses: svenstaro/upload-release-action@v2
|
||||||
|
with:
|
||||||
|
file_glob: true
|
||||||
|
overwrite: true
|
||||||
|
file: ./mfc-examples/${{ matrix.arch }}/Release/sherpa-onnx-non-streaming-*.exe
|
||||||
|
|||||||
@@ -113,7 +113,7 @@ function(download_onnxruntime)
|
|||||||
|
|
||||||
set(onnxruntime_URL "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x86-static-1.15.1.tar.bz2")
|
set(onnxruntime_URL "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x86-static-1.15.1.tar.bz2")
|
||||||
set(onnxruntime_URL2 "")
|
set(onnxruntime_URL2 "")
|
||||||
set(onnxruntime_HASH "SHA256=a2b33a3e8a1f89cddf303f0a97a5a88f4202579c653cfb29158c8cf7da3734eb")
|
set(onnxruntime_HASH "SHA256=94d9a30976b5c4a5dff7508d00f141835916e5a36315d5f53be9b3edb85148b5")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(SHERPA_ONNX_ENABLE_GPU)
|
if(SHERPA_ONNX_ENABLE_GPU)
|
||||||
@@ -161,7 +161,7 @@ function(download_onnxruntime)
|
|||||||
|
|
||||||
set(onnxruntime_URL "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x64-static-1.15.1.tar.bz2")
|
set(onnxruntime_URL "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x64-static-1.15.1.tar.bz2")
|
||||||
set(onnxruntime_URL2 "")
|
set(onnxruntime_URL2 "")
|
||||||
set(onnxruntime_HASH "SHA256=f5c19ac1fc6a61c78a231a41df10aede2586665ab397bdc3f007eb8d2c8d4a19")
|
set(onnxruntime_HASH "SHA256=c809a8510a89b8b37ae7d563c39229db22bac8fbefcbfe5c81a60b367d065b1b")
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
# After downloading, it contains:
|
# After downloading, it contains:
|
||||||
|
|||||||
@@ -0,0 +1,86 @@
|
|||||||
|
|
||||||
|
// NonStreamingSpeechRecognition.cpp : Defines the class behaviors for the
|
||||||
|
// application.
|
||||||
|
//
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
|
#include "pch.h"
|
||||||
|
#include "framework.h"
|
||||||
|
#include "NonStreamingSpeechRecognitionDlg.h"
|
||||||
|
#include "NonStreamingSpeechRecognition.h"
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
#ifdef _DEBUG
|
||||||
|
#define new DEBUG_NEW
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// CNonStreamingSpeechRecognitionApp
|
||||||
|
|
||||||
|
BEGIN_MESSAGE_MAP(CNonStreamingSpeechRecognitionApp, CWinApp)
|
||||||
|
ON_COMMAND(ID_HELP, &CWinApp::OnHelp)
|
||||||
|
END_MESSAGE_MAP()
|
||||||
|
|
||||||
|
// CNonStreamingSpeechRecognitionApp construction
|
||||||
|
|
||||||
|
CNonStreamingSpeechRecognitionApp::CNonStreamingSpeechRecognitionApp() {
|
||||||
|
// TODO: add construction code here,
|
||||||
|
// Place all significant initialization in InitInstance
|
||||||
|
}
|
||||||
|
|
||||||
|
// The one and only CNonStreamingSpeechRecognitionApp object
|
||||||
|
|
||||||
|
CNonStreamingSpeechRecognitionApp theApp;
|
||||||
|
|
||||||
|
// CNonStreamingSpeechRecognitionApp initialization
|
||||||
|
|
||||||
|
BOOL CNonStreamingSpeechRecognitionApp::InitInstance() {
|
||||||
|
CWinApp::InitInstance();
|
||||||
|
|
||||||
|
// Create the shell manager, in case the dialog contains
|
||||||
|
// any shell tree view or shell list view controls.
|
||||||
|
CShellManager *pShellManager = new CShellManager;
|
||||||
|
|
||||||
|
// Activate "Windows Native" visual manager for enabling themes in MFC
|
||||||
|
// controls
|
||||||
|
CMFCVisualManager::SetDefaultManager(RUNTIME_CLASS(CMFCVisualManagerWindows));
|
||||||
|
|
||||||
|
// Standard initialization
|
||||||
|
// If you are not using these features and wish to reduce the size
|
||||||
|
// of your final executable, you should remove from the following
|
||||||
|
// the specific initialization routines you do not need
|
||||||
|
// Change the registry key under which our settings are stored
|
||||||
|
// TODO: You should modify this string to be something appropriate
|
||||||
|
// such as the name of your company or organization
|
||||||
|
SetRegistryKey(_T("Local AppWizard-Generated Applications"));
|
||||||
|
|
||||||
|
CNonStreamingSpeechRecognitionDlg dlg;
|
||||||
|
m_pMainWnd = &dlg;
|
||||||
|
INT_PTR nResponse = dlg.DoModal();
|
||||||
|
if (nResponse == IDOK) {
|
||||||
|
// TODO: Place code here to handle when the dialog is
|
||||||
|
// dismissed with OK
|
||||||
|
} else if (nResponse == IDCANCEL) {
|
||||||
|
// TODO: Place code here to handle when the dialog is
|
||||||
|
// dismissed with Cancel
|
||||||
|
} else if (nResponse == -1) {
|
||||||
|
TRACE(traceAppMsg, 0,
|
||||||
|
"Warning: dialog creation failed, so application is terminating "
|
||||||
|
"unexpectedly.\n");
|
||||||
|
TRACE(traceAppMsg, 0,
|
||||||
|
"Warning: if you are using MFC controls on the dialog, you cannot "
|
||||||
|
"#define _AFX_NO_MFC_CONTROLS_IN_DIALOGS.\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete the shell manager created above.
|
||||||
|
if (pShellManager != nullptr) {
|
||||||
|
delete pShellManager;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if !defined(_AFXDLL) && !defined(_AFX_NO_MFC_CONTROLS_IN_DIALOGS)
|
||||||
|
ControlBarCleanUp();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Since the dialog has been closed, return FALSE so that we exit the
|
||||||
|
// application, rather than start the application's message pump.
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
@@ -0,0 +1,31 @@
|
|||||||
|
|
||||||
|
// NonStreamingSpeechRecognition.h : main header file for the PROJECT_NAME
|
||||||
|
// application
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifndef __AFXWIN_H__
|
||||||
|
#error "include 'pch.h' before including this file for PCH"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "resource.h" // main symbols
|
||||||
|
|
||||||
|
// CNonStreamingSpeechRecognitionApp:
|
||||||
|
// See NonStreamingSpeechRecognition.cpp for the implementation of this class
|
||||||
|
//
|
||||||
|
|
||||||
|
class CNonStreamingSpeechRecognitionApp : public CWinApp {
|
||||||
|
public:
|
||||||
|
CNonStreamingSpeechRecognitionApp();
|
||||||
|
|
||||||
|
// Overrides
|
||||||
|
public:
|
||||||
|
virtual BOOL InitInstance();
|
||||||
|
|
||||||
|
// Implementation
|
||||||
|
|
||||||
|
DECLARE_MESSAGE_MAP()
|
||||||
|
};
|
||||||
|
|
||||||
|
extern CNonStreamingSpeechRecognitionApp theApp;
|
||||||
Binary file not shown.
@@ -0,0 +1,219 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<ItemGroup Label="ProjectConfigurations">
|
||||||
|
<ProjectConfiguration Include="Debug|Win32">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|Win32">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Debug|x64">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|x64">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>x64</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
</ItemGroup>
|
||||||
|
<PropertyGroup Label="Globals">
|
||||||
|
<VCProjectVersion>17.0</VCProjectVersion>
|
||||||
|
<ProjectGuid>{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}</ProjectGuid>
|
||||||
|
<Keyword>MFCProj</Keyword>
|
||||||
|
<RootNamespace>NonStreamingSpeechRecognition</RootNamespace>
|
||||||
|
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
||||||
|
</PropertyGroup>
|
||||||
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||||
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
|
<UseDebugLibraries>true</UseDebugLibraries>
|
||||||
|
<PlatformToolset>v143</PlatformToolset>
|
||||||
|
<CharacterSet>Unicode</CharacterSet>
|
||||||
|
<UseOfMfc>Static</UseOfMfc>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||||
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
|
<UseDebugLibraries>false</UseDebugLibraries>
|
||||||
|
<PlatformToolset>v143</PlatformToolset>
|
||||||
|
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||||
|
<CharacterSet>Unicode</CharacterSet>
|
||||||
|
<UseOfMfc>Static</UseOfMfc>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||||
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
|
<UseDebugLibraries>true</UseDebugLibraries>
|
||||||
|
<PlatformToolset>v143</PlatformToolset>
|
||||||
|
<CharacterSet>Unicode</CharacterSet>
|
||||||
|
<UseOfMfc>Static</UseOfMfc>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||||
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
|
<UseDebugLibraries>false</UseDebugLibraries>
|
||||||
|
<PlatformToolset>v143</PlatformToolset>
|
||||||
|
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||||
|
<CharacterSet>Unicode</CharacterSet>
|
||||||
|
<UseOfMfc>Static</UseOfMfc>
|
||||||
|
</PropertyGroup>
|
||||||
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
|
<ImportGroup Label="ExtensionSettings">
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="Shared">
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||||
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||||
|
<Import Project="sherpa-onnx-deps.props" />
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||||
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||||
|
<Import Project="sherpa-onnx-deps.props" />
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||||
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||||
|
<Import Project="sherpa-onnx-deps.props" />
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||||
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||||
|
<Import Project="sherpa-onnx-deps.props" />
|
||||||
|
</ImportGroup>
|
||||||
|
<PropertyGroup Label="UserMacros" />
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||||
|
<LinkIncremental>false</LinkIncremental>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||||
|
<LinkIncremental>true</LinkIncremental>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||||
|
<LinkIncremental>true</LinkIncremental>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||||
|
<LinkIncremental>false</LinkIncremental>
|
||||||
|
</PropertyGroup>
|
||||||
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||||
|
<ClCompile>
|
||||||
|
<PrecompiledHeader>Use</PrecompiledHeader>
|
||||||
|
<WarningLevel>Level3</WarningLevel>
|
||||||
|
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||||
|
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||||
|
<SDLCheck>true</SDLCheck>
|
||||||
|
<PreprocessorDefinitions>_WINDOWS;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
|
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
|
||||||
|
</ClCompile>
|
||||||
|
<Link>
|
||||||
|
<SubSystem>Windows</SubSystem>
|
||||||
|
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||||
|
<OptimizeReferences>true</OptimizeReferences>
|
||||||
|
</Link>
|
||||||
|
<Midl>
|
||||||
|
<MkTypLibCompatible>false</MkTypLibCompatible>
|
||||||
|
<ValidateAllParameters>true</ValidateAllParameters>
|
||||||
|
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
|
</Midl>
|
||||||
|
<ResourceCompile>
|
||||||
|
<Culture>0x0409</Culture>
|
||||||
|
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
|
<AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
|
</ResourceCompile>
|
||||||
|
</ItemDefinitionGroup>
|
||||||
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||||
|
<ClCompile>
|
||||||
|
<PrecompiledHeader>Use</PrecompiledHeader>
|
||||||
|
<WarningLevel>Level3</WarningLevel>
|
||||||
|
<SDLCheck>true</SDLCheck>
|
||||||
|
<PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
|
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
|
||||||
|
</ClCompile>
|
||||||
|
<Link>
|
||||||
|
<SubSystem>Windows</SubSystem>
|
||||||
|
</Link>
|
||||||
|
<Midl>
|
||||||
|
<MkTypLibCompatible>false</MkTypLibCompatible>
|
||||||
|
<ValidateAllParameters>true</ValidateAllParameters>
|
||||||
|
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
|
</Midl>
|
||||||
|
<ResourceCompile>
|
||||||
|
<Culture>0x0409</Culture>
|
||||||
|
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
|
<AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
|
</ResourceCompile>
|
||||||
|
</ItemDefinitionGroup>
|
||||||
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||||
|
<ClCompile>
|
||||||
|
<PrecompiledHeader>Use</PrecompiledHeader>
|
||||||
|
<WarningLevel>Level3</WarningLevel>
|
||||||
|
<SDLCheck>true</SDLCheck>
|
||||||
|
<PreprocessorDefinitions>_WINDOWS;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
|
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
|
||||||
|
</ClCompile>
|
||||||
|
<Link>
|
||||||
|
<SubSystem>Windows</SubSystem>
|
||||||
|
</Link>
|
||||||
|
<Midl>
|
||||||
|
<MkTypLibCompatible>false</MkTypLibCompatible>
|
||||||
|
<ValidateAllParameters>true</ValidateAllParameters>
|
||||||
|
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
|
</Midl>
|
||||||
|
<ResourceCompile>
|
||||||
|
<Culture>0x0409</Culture>
|
||||||
|
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
|
<AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
|
</ResourceCompile>
|
||||||
|
</ItemDefinitionGroup>
|
||||||
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||||
|
<ClCompile>
|
||||||
|
<PrecompiledHeader>Use</PrecompiledHeader>
|
||||||
|
<WarningLevel>Level3</WarningLevel>
|
||||||
|
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||||
|
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||||
|
<SDLCheck>true</SDLCheck>
|
||||||
|
<PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
|
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
|
||||||
|
</ClCompile>
|
||||||
|
<Link>
|
||||||
|
<SubSystem>Windows</SubSystem>
|
||||||
|
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||||
|
<OptimizeReferences>true</OptimizeReferences>
|
||||||
|
</Link>
|
||||||
|
<Midl>
|
||||||
|
<MkTypLibCompatible>false</MkTypLibCompatible>
|
||||||
|
<ValidateAllParameters>true</ValidateAllParameters>
|
||||||
|
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
|
</Midl>
|
||||||
|
<ResourceCompile>
|
||||||
|
<Culture>0x0409</Culture>
|
||||||
|
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
|
<AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
|
</ResourceCompile>
|
||||||
|
</ItemDefinitionGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<ClInclude Include="framework.h" />
|
||||||
|
<ClInclude Include="NonStreamingSpeechRecognition.h" />
|
||||||
|
<ClInclude Include="NonStreamingSpeechRecognitionDlg.h" />
|
||||||
|
<ClInclude Include="pch.h" />
|
||||||
|
<ClInclude Include="Resource.h" />
|
||||||
|
<ClInclude Include="targetver.h" />
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<ClCompile Include="NonStreamingSpeechRecognition.cpp" />
|
||||||
|
<ClCompile Include="NonStreamingSpeechRecognitionDlg.cpp" />
|
||||||
|
<ClCompile Include="pch.cpp">
|
||||||
|
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
|
||||||
|
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
|
||||||
|
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
|
||||||
|
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
|
||||||
|
</ClCompile>
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<ResourceCompile Include="NonStreamingSpeechRecognition.rc" />
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<None Include="res\NonStreamingSpeechRecognition.rc2" />
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<Image Include="res\NonStreamingSpeechRecognition.ico" />
|
||||||
|
</ItemGroup>
|
||||||
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
|
<ImportGroup Label="ExtensionTargets">
|
||||||
|
</ImportGroup>
|
||||||
|
</Project>
|
||||||
@@ -0,0 +1,63 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<ItemGroup>
|
||||||
|
<Filter Include="Source Files">
|
||||||
|
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
|
||||||
|
<Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
|
||||||
|
</Filter>
|
||||||
|
<Filter Include="Header Files">
|
||||||
|
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
|
||||||
|
<Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
|
||||||
|
</Filter>
|
||||||
|
<Filter Include="Resource Files">
|
||||||
|
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
|
||||||
|
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
|
||||||
|
</Filter>
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<ClInclude Include="NonStreamingSpeechRecognition.h">
|
||||||
|
<Filter>Header Files</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="NonStreamingSpeechRecognitionDlg.h">
|
||||||
|
<Filter>Header Files</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="framework.h">
|
||||||
|
<Filter>Header Files</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="targetver.h">
|
||||||
|
<Filter>Header Files</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="Resource.h">
|
||||||
|
<Filter>Header Files</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="pch.h">
|
||||||
|
<Filter>Header Files</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<ClCompile Include="NonStreamingSpeechRecognition.cpp">
|
||||||
|
<Filter>Source Files</Filter>
|
||||||
|
</ClCompile>
|
||||||
|
<ClCompile Include="NonStreamingSpeechRecognitionDlg.cpp">
|
||||||
|
<Filter>Source Files</Filter>
|
||||||
|
</ClCompile>
|
||||||
|
<ClCompile Include="pch.cpp">
|
||||||
|
<Filter>Source Files</Filter>
|
||||||
|
</ClCompile>
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<ResourceCompile Include="NonStreamingSpeechRecognition.rc">
|
||||||
|
<Filter>Resource Files</Filter>
|
||||||
|
</ResourceCompile>
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<None Include="res\NonStreamingSpeechRecognition.rc2">
|
||||||
|
<Filter>Resource Files</Filter>
|
||||||
|
</None>
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<Image Include="res\NonStreamingSpeechRecognition.ico">
|
||||||
|
<Filter>Resource Files</Filter>
|
||||||
|
</Image>
|
||||||
|
</ItemGroup>
|
||||||
|
</Project>
|
||||||
@@ -0,0 +1,473 @@
|
|||||||
|
|
||||||
|
// NonStreamingSpeechRecognitionDlg.cpp : implementation file
|
||||||
|
//
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
|
#include "pch.h"
|
||||||
|
#include "framework.h"
|
||||||
|
#include "afxdialogex.h"
|
||||||
|
#include "NonStreamingSpeechRecognition.h"
|
||||||
|
#include "NonStreamingSpeechRecognitionDlg.h"
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
#include <fstream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#ifdef _DEBUG
|
||||||
|
#define new DEBUG_NEW
|
||||||
|
#endif
|
||||||
|
|
||||||
|
Microphone::Microphone() {
|
||||||
|
PaError err = Pa_Initialize();
|
||||||
|
if (err != paNoError) {
|
||||||
|
fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
|
||||||
|
exit(-2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Microphone::~Microphone() {
|
||||||
|
PaError err = Pa_Terminate();
|
||||||
|
if (err != paNoError) {
|
||||||
|
fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
|
||||||
|
exit(-2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// see
|
||||||
|
// https://stackoverflow.com/questions/7153935/how-to-convert-utf-8-stdstring-to-utf-16-stdwstring
|
||||||
|
static std::wstring Utf8ToUtf16(const std::string &utf8) {
|
||||||
|
std::vector<unsigned long> unicode;
|
||||||
|
size_t i = 0;
|
||||||
|
while (i < utf8.size()) {
|
||||||
|
unsigned long uni;
|
||||||
|
size_t todo;
|
||||||
|
bool error = false;
|
||||||
|
unsigned char ch = utf8[i++];
|
||||||
|
if (ch <= 0x7F) {
|
||||||
|
uni = ch;
|
||||||
|
todo = 0;
|
||||||
|
} else if (ch <= 0xBF) {
|
||||||
|
throw std::logic_error("not a UTF-8 string");
|
||||||
|
} else if (ch <= 0xDF) {
|
||||||
|
uni = ch & 0x1F;
|
||||||
|
todo = 1;
|
||||||
|
} else if (ch <= 0xEF) {
|
||||||
|
uni = ch & 0x0F;
|
||||||
|
todo = 2;
|
||||||
|
} else if (ch <= 0xF7) {
|
||||||
|
uni = ch & 0x07;
|
||||||
|
todo = 3;
|
||||||
|
} else {
|
||||||
|
throw std::logic_error("not a UTF-8 string");
|
||||||
|
}
|
||||||
|
for (size_t j = 0; j < todo; ++j) {
|
||||||
|
if (i == utf8.size()) throw std::logic_error("not a UTF-8 string");
|
||||||
|
unsigned char ch = utf8[i++];
|
||||||
|
if (ch < 0x80 || ch > 0xBF) throw std::logic_error("not a UTF-8 string");
|
||||||
|
uni <<= 6;
|
||||||
|
uni += ch & 0x3F;
|
||||||
|
}
|
||||||
|
if (uni >= 0xD800 && uni <= 0xDFFF)
|
||||||
|
throw std::logic_error("not a UTF-8 string");
|
||||||
|
if (uni > 0x10FFFF) throw std::logic_error("not a UTF-8 string");
|
||||||
|
unicode.push_back(uni);
|
||||||
|
}
|
||||||
|
std::wstring utf16;
|
||||||
|
for (size_t i = 0; i < unicode.size(); ++i) {
|
||||||
|
unsigned long uni = unicode[i];
|
||||||
|
if (uni <= 0xFFFF) {
|
||||||
|
utf16 += (wchar_t)uni;
|
||||||
|
} else {
|
||||||
|
uni -= 0x10000;
|
||||||
|
utf16 += (wchar_t)((uni >> 10) + 0xD800);
|
||||||
|
utf16 += (wchar_t)((uni & 0x3FF) + 0xDC00);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return utf16;
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string Cat(const std::vector<std::string> &results) {
|
||||||
|
std::ostringstream os;
|
||||||
|
std::string sep;
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
for (i = 0; i != results.size(); ++i) {
|
||||||
|
os << sep << i << ": " << results[i];
|
||||||
|
sep = "\r\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
return os.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
// CNonStreamingSpeechRecognitionDlg dialog
|
||||||
|
|
||||||
|
CNonStreamingSpeechRecognitionDlg::CNonStreamingSpeechRecognitionDlg(
|
||||||
|
CWnd *pParent /*=nullptr*/)
|
||||||
|
: CDialogEx(IDD_NONSTREAMINGSPEECHRECOGNITION_DIALOG, pParent) {
|
||||||
|
m_hIcon = AfxGetApp()->LoadIcon(IDR_MAINFRAME);
|
||||||
|
}
|
||||||
|
|
||||||
|
CNonStreamingSpeechRecognitionDlg::~CNonStreamingSpeechRecognitionDlg() {
|
||||||
|
if (recognizer_) {
|
||||||
|
DestroyOfflineRecognizer(recognizer_);
|
||||||
|
recognizer_ = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void CNonStreamingSpeechRecognitionDlg::DoDataExchange(CDataExchange *pDX) {
|
||||||
|
CDialogEx::DoDataExchange(pDX);
|
||||||
|
DDX_Control(pDX, IDC_EDIT1, my_text_);
|
||||||
|
DDX_Control(pDX, IDOK, my_btn_);
|
||||||
|
}
|
||||||
|
|
||||||
|
BEGIN_MESSAGE_MAP(CNonStreamingSpeechRecognitionDlg, CDialogEx)
|
||||||
|
ON_WM_PAINT()
|
||||||
|
ON_WM_QUERYDRAGICON()
|
||||||
|
ON_BN_CLICKED(IDOK, &CNonStreamingSpeechRecognitionDlg::OnBnClickedOk)
|
||||||
|
END_MESSAGE_MAP()
|
||||||
|
|
||||||
|
// CNonStreamingSpeechRecognitionDlg message handlers
|
||||||
|
|
||||||
|
BOOL CNonStreamingSpeechRecognitionDlg::OnInitDialog() {
|
||||||
|
CDialogEx::OnInitDialog();
|
||||||
|
|
||||||
|
// Set the icon for this dialog. The framework does this automatically
|
||||||
|
// when the application's main window is not a dialog
|
||||||
|
SetIcon(m_hIcon, TRUE); // Set big icon
|
||||||
|
SetIcon(m_hIcon, FALSE); // Set small icon
|
||||||
|
|
||||||
|
// TODO: Add extra initialization here
|
||||||
|
InitMicrophone();
|
||||||
|
|
||||||
|
return TRUE; // return TRUE unless you set the focus to a control
|
||||||
|
}
|
||||||
|
|
||||||
|
// If you add a minimize button to your dialog, you will need the code below
|
||||||
|
// to draw the icon. For MFC applications using the document/view model,
|
||||||
|
// this is automatically done for you by the framework.
|
||||||
|
|
||||||
|
void CNonStreamingSpeechRecognitionDlg::OnPaint() {
|
||||||
|
if (IsIconic()) {
|
||||||
|
CPaintDC dc(this); // device context for painting
|
||||||
|
|
||||||
|
SendMessage(WM_ICONERASEBKGND, reinterpret_cast<WPARAM>(dc.GetSafeHdc()),
|
||||||
|
0);
|
||||||
|
|
||||||
|
// Center icon in client rectangle
|
||||||
|
int cxIcon = GetSystemMetrics(SM_CXICON);
|
||||||
|
int cyIcon = GetSystemMetrics(SM_CYICON);
|
||||||
|
CRect rect;
|
||||||
|
GetClientRect(&rect);
|
||||||
|
int x = (rect.Width() - cxIcon + 1) / 2;
|
||||||
|
int y = (rect.Height() - cyIcon + 1) / 2;
|
||||||
|
|
||||||
|
// Draw the icon
|
||||||
|
dc.DrawIcon(x, y, m_hIcon);
|
||||||
|
} else {
|
||||||
|
CDialogEx::OnPaint();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The system calls this function to obtain the cursor to display while the user
|
||||||
|
// drags
|
||||||
|
// the minimized window.
|
||||||
|
HCURSOR CNonStreamingSpeechRecognitionDlg::OnQueryDragIcon() {
|
||||||
|
return static_cast<HCURSOR>(m_hIcon);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int32_t RecordCallback(const void *input_buffer,
|
||||||
|
void * /*output_buffer*/,
|
||||||
|
unsigned long frames_per_buffer, // NOLINT
|
||||||
|
const PaStreamCallbackTimeInfo * /*time_info*/,
|
||||||
|
PaStreamCallbackFlags /*status_flags*/,
|
||||||
|
void *user_data) {
|
||||||
|
auto dlg = reinterpret_cast<CNonStreamingSpeechRecognitionDlg *>(user_data);
|
||||||
|
auto begin = reinterpret_cast<const float *>(input_buffer);
|
||||||
|
auto end = begin + frames_per_buffer;
|
||||||
|
dlg->samples_.insert(dlg->samples_.end(), begin, end);
|
||||||
|
|
||||||
|
return dlg->started_ ? paContinue : paComplete;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CNonStreamingSpeechRecognitionDlg::OnBnClickedOk() {
|
||||||
|
if (!recognizer_) {
|
||||||
|
AppendLineToMultilineEditCtrl("Creating recognizer...");
|
||||||
|
AppendLineToMultilineEditCtrl("It will take several seconds. Please wait");
|
||||||
|
InitRecognizer();
|
||||||
|
if (!recognizer_) {
|
||||||
|
// failed to create the recognizer
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
AppendLineToMultilineEditCtrl("Recognizer created!");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!started_) {
|
||||||
|
samples_.clear();
|
||||||
|
started_ = true;
|
||||||
|
|
||||||
|
PaStreamParameters param;
|
||||||
|
param.device = Pa_GetDefaultInputDevice();
|
||||||
|
const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
|
||||||
|
param.channelCount = 1;
|
||||||
|
param.sampleFormat = paFloat32;
|
||||||
|
param.suggestedLatency = info->defaultLowInputLatency;
|
||||||
|
param.hostApiSpecificStreamInfo = nullptr;
|
||||||
|
float sample_rate = config_.feat_config.sample_rate;
|
||||||
|
pa_stream_ = nullptr;
|
||||||
|
PaError err =
|
||||||
|
Pa_OpenStream(&pa_stream_, ¶m, nullptr, /* &outputParameters, */
|
||||||
|
sample_rate,
|
||||||
|
0, // frames per buffer
|
||||||
|
paClipOff, // we won't output out of range samples
|
||||||
|
// so don't bother clipping them
|
||||||
|
RecordCallback, this);
|
||||||
|
if (err != paNoError) {
|
||||||
|
AppendLineToMultilineEditCtrl(std::string("PortAudio error: ") +
|
||||||
|
Pa_GetErrorText(err));
|
||||||
|
my_btn_.EnableWindow(FALSE);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = Pa_StartStream(pa_stream_);
|
||||||
|
if (err != paNoError) {
|
||||||
|
AppendLineToMultilineEditCtrl(std::string("PortAudio error: ") +
|
||||||
|
Pa_GetErrorText(err));
|
||||||
|
my_btn_.EnableWindow(FALSE);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
AppendLineToMultilineEditCtrl(
|
||||||
|
"\r\nStarted! Please speak and click stop.\r\n");
|
||||||
|
my_btn_.SetWindowText(_T("Stop"));
|
||||||
|
|
||||||
|
} else {
|
||||||
|
started_ = false;
|
||||||
|
|
||||||
|
Pa_Sleep(200); // sleep for 200ms
|
||||||
|
if (pa_stream_) {
|
||||||
|
PaError err = Pa_CloseStream(pa_stream_);
|
||||||
|
if (err != paNoError) {
|
||||||
|
AppendLineToMultilineEditCtrl(std::string("PortAudio error: ") +
|
||||||
|
Pa_GetErrorText(err));
|
||||||
|
my_btn_.EnableWindow(FALSE);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pa_stream_ = nullptr;
|
||||||
|
|
||||||
|
SherpaOnnxOfflineStream *stream = CreateOfflineStream(recognizer_);
|
||||||
|
|
||||||
|
AcceptWaveformOffline(stream, config_.feat_config.sample_rate,
|
||||||
|
samples_.data(), samples_.size());
|
||||||
|
DecodeOfflineStream(recognizer_, stream);
|
||||||
|
SherpaOnnxOfflineRecognizerResult *r = GetOfflineStreamResult(stream);
|
||||||
|
results_.emplace_back(r->text);
|
||||||
|
|
||||||
|
auto str = Utf8ToUtf16(Cat(results_).c_str());
|
||||||
|
my_text_.SetWindowText(str.c_str());
|
||||||
|
my_text_.SetFocus();
|
||||||
|
my_text_.SetSel(-1);
|
||||||
|
|
||||||
|
DestroyOfflineRecognizerResult(r);
|
||||||
|
|
||||||
|
DestroyOfflineStream(stream);
|
||||||
|
// AfxMessageBox("Stopped", MB_OK);
|
||||||
|
my_btn_.SetWindowText(_T("Start"));
|
||||||
|
AppendLineToMultilineEditCtrl("\r\nStopped. Please click start and speak");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void CNonStreamingSpeechRecognitionDlg::InitMicrophone() {
|
||||||
|
int default_device = Pa_GetDefaultInputDevice();
|
||||||
|
int device_count = Pa_GetDeviceCount();
|
||||||
|
if (default_device == paNoDevice) {
|
||||||
|
// CString str;
|
||||||
|
// str.Format(_T("No default input device found!"));
|
||||||
|
// AfxMessageBox(str, MB_OK | MB_ICONSTOP);
|
||||||
|
// exit(-1);
|
||||||
|
AppendLineToMultilineEditCtrl("No default input device found!");
|
||||||
|
my_btn_.EnableWindow(FALSE);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
AppendLineToMultilineEditCtrl(std::string("Selected device ") +
|
||||||
|
Pa_GetDeviceInfo(default_device)->name);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CNonStreamingSpeechRecognitionDlg::Exists(const std::string &filename) {
|
||||||
|
std::ifstream is(filename);
|
||||||
|
return is.good();
|
||||||
|
}
|
||||||
|
|
||||||
|
void CNonStreamingSpeechRecognitionDlg::ShowInitRecognizerHelpMessage() {
|
||||||
|
my_btn_.EnableWindow(FALSE);
|
||||||
|
std::string msg =
|
||||||
|
"\r\nPlease go to\r\n"
|
||||||
|
"https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html "
|
||||||
|
"\r\n";
|
||||||
|
msg += "to download a non-streaming model, i.e., an offline model.\r\n";
|
||||||
|
msg +=
|
||||||
|
"You need to rename them to encoder.onnx, decoder.onnx, and "
|
||||||
|
"joiner.onnx correspoondingly.\r\n\r\n";
|
||||||
|
msg += "It supports both transducer models and paraformer models.\r\n\r\n";
|
||||||
|
msg +=
|
||||||
|
"We give two examples below to show you how to download models\r\n\r\n";
|
||||||
|
msg += "(1) Transducer\r\n\r\n";
|
||||||
|
msg +=
|
||||||
|
"We use "
|
||||||
|
"https://huggingface.co/pkufool/"
|
||||||
|
"icefall-asr-zipformer-wenetspeech-20230615 below\r\n";
|
||||||
|
msg +=
|
||||||
|
"wget "
|
||||||
|
"https://huggingface.co/pkufool/"
|
||||||
|
"icefall-asr-zipformer-wenetspeech-20230615/resolve/main/exp/"
|
||||||
|
"encoder-epoch-12-avg-4.onnx\r\n";
|
||||||
|
msg +=
|
||||||
|
"wget "
|
||||||
|
"https://huggingface.co/pkufool/"
|
||||||
|
"icefall-asr-zipformer-wenetspeech-20230615/resolve/main/exp/"
|
||||||
|
"decoder-epoch-12-avg-4.onnx\r\n";
|
||||||
|
msg +=
|
||||||
|
"wget "
|
||||||
|
"https://huggingface.co/pkufool/"
|
||||||
|
"icefall-asr-zipformer-wenetspeech-20230615/resolve/main/exp/"
|
||||||
|
"joiner-epoch-12-avg-4.onnx\r\n";
|
||||||
|
msg += "\r\n Now rename them\r\n";
|
||||||
|
msg += "mv encoder-epoch-12-avg-4.onnx encoder.onnx\r\n";
|
||||||
|
msg += "mv decoder-epoch-12-avg-4.onnx decoder.onnx\r\n";
|
||||||
|
msg += "mv joiner-epoch-12-avg-4.onnx joiner.onnx\r\n\r\n";
|
||||||
|
msg += "(2) Paraformer\r\n\r\n";
|
||||||
|
msg +=
|
||||||
|
"wget "
|
||||||
|
"https://huggingface.co/csukuangfj/"
|
||||||
|
"sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/model.onnx\r\n";
|
||||||
|
msg +=
|
||||||
|
"wget "
|
||||||
|
"https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/"
|
||||||
|
"resolve/main/tokens.txt\r\n\r\n";
|
||||||
|
msg += "\r\n Now rename them\r\n";
|
||||||
|
msg += "mv model.onnx paraformer.onnx\r\n";
|
||||||
|
msg += "\r\n";
|
||||||
|
msg += "That's it!\r\n";
|
||||||
|
|
||||||
|
AppendLineToMultilineEditCtrl(msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CNonStreamingSpeechRecognitionDlg::InitParaformer() {
|
||||||
|
std::string paraformer = "./paraformer.onnx";
|
||||||
|
std::string tokens = "./tokens.txt";
|
||||||
|
|
||||||
|
bool is_ok = true;
|
||||||
|
|
||||||
|
if (Exists("./paraformer.int8.onnx")) {
|
||||||
|
paraformer = "./paraformer.int8.onnx";
|
||||||
|
} else if (!Exists(paraformer)) {
|
||||||
|
std::string msg = paraformer + " does not exist!";
|
||||||
|
AppendLineToMultilineEditCtrl(msg);
|
||||||
|
is_ok = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Exists(tokens)) {
|
||||||
|
std::string msg = tokens + " does not exist!";
|
||||||
|
AppendLineToMultilineEditCtrl(msg);
|
||||||
|
is_ok = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!is_ok) {
|
||||||
|
ShowInitRecognizerHelpMessage();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(&config_, 0, sizeof(config_));
|
||||||
|
|
||||||
|
config_.feat_config.sample_rate = 16000;
|
||||||
|
config_.feat_config.feature_dim = 80;
|
||||||
|
|
||||||
|
config_.model_config.paraformer.model = paraformer.c_str();
|
||||||
|
config_.model_config.tokens = tokens.c_str();
|
||||||
|
config_.model_config.num_threads = 1;
|
||||||
|
config_.model_config.debug = 1;
|
||||||
|
|
||||||
|
config_.decoding_method = "greedy_search";
|
||||||
|
config_.max_active_paths = 4;
|
||||||
|
|
||||||
|
recognizer_ = CreateOfflineRecognizer(&config_);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CNonStreamingSpeechRecognitionDlg::InitRecognizer() {
|
||||||
|
if (Exists("./paraformer.onnx") || Exists("./paraformer.int8.onnx")) {
|
||||||
|
InitParaformer();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// assume it is transducer
|
||||||
|
|
||||||
|
std::string encoder = "./encoder.onnx";
|
||||||
|
std::string decoder = "./decoder.onnx";
|
||||||
|
std::string joiner = "./joiner.onnx";
|
||||||
|
std::string tokens = "./tokens.txt";
|
||||||
|
|
||||||
|
bool is_ok = true;
|
||||||
|
if (!Exists(encoder)) {
|
||||||
|
std::string msg = encoder + " does not exist!";
|
||||||
|
AppendLineToMultilineEditCtrl(msg);
|
||||||
|
is_ok = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Exists(decoder)) {
|
||||||
|
std::string msg = decoder + " does not exist!";
|
||||||
|
AppendLineToMultilineEditCtrl(msg);
|
||||||
|
is_ok = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Exists(joiner)) {
|
||||||
|
std::string msg = joiner + " does not exist!";
|
||||||
|
AppendLineToMultilineEditCtrl(msg);
|
||||||
|
is_ok = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Exists(tokens)) {
|
||||||
|
std::string msg = tokens + " does not exist!";
|
||||||
|
AppendLineToMultilineEditCtrl(msg);
|
||||||
|
is_ok = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!is_ok) {
|
||||||
|
ShowInitRecognizerHelpMessage();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
memset(&config_, 0, sizeof(config_));
|
||||||
|
|
||||||
|
config_.feat_config.sample_rate = 16000;
|
||||||
|
config_.feat_config.feature_dim = 80;
|
||||||
|
|
||||||
|
config_.model_config.transducer.encoder = encoder.c_str();
|
||||||
|
config_.model_config.transducer.decoder = decoder.c_str();
|
||||||
|
config_.model_config.transducer.joiner = joiner.c_str();
|
||||||
|
config_.model_config.tokens = tokens.c_str();
|
||||||
|
config_.model_config.num_threads = 1;
|
||||||
|
config_.model_config.debug = 0;
|
||||||
|
|
||||||
|
config_.decoding_method = "greedy_search";
|
||||||
|
config_.max_active_paths = 4;
|
||||||
|
|
||||||
|
recognizer_ = CreateOfflineRecognizer(&config_);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CNonStreamingSpeechRecognitionDlg::AppendTextToEditCtrl(
|
||||||
|
const std::string &s) {
|
||||||
|
// get the initial text length
|
||||||
|
int nLength = my_text_.GetWindowTextLength();
|
||||||
|
// put the selection at the end of text
|
||||||
|
my_text_.SetSel(nLength, nLength);
|
||||||
|
// replace the selection
|
||||||
|
|
||||||
|
std::wstring wstr = Utf8ToUtf16(s);
|
||||||
|
|
||||||
|
my_text_.ReplaceSel(wstr.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
void CNonStreamingSpeechRecognitionDlg::AppendLineToMultilineEditCtrl(
|
||||||
|
const std::string &s) {
|
||||||
|
AppendTextToEditCtrl("\r\n" + s);
|
||||||
|
}
|
||||||
@@ -0,0 +1,73 @@
|
|||||||
|
|
||||||
|
// NonStreamingSpeechRecognitionDlg.h : header file
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "portaudio.h"
|
||||||
|
#include "sherpa-onnx/c-api/c-api.h"
|
||||||
|
|
||||||
|
class Microphone {
|
||||||
|
public:
|
||||||
|
Microphone();
|
||||||
|
~Microphone();
|
||||||
|
};
|
||||||
|
|
||||||
|
// CNonStreamingSpeechRecognitionDlg dialog
|
||||||
|
class CNonStreamingSpeechRecognitionDlg : public CDialogEx {
|
||||||
|
// Construction
|
||||||
|
public:
|
||||||
|
CNonStreamingSpeechRecognitionDlg(
|
||||||
|
CWnd *pParent = nullptr); // standard constructor
|
||||||
|
~CNonStreamingSpeechRecognitionDlg();
|
||||||
|
|
||||||
|
// Dialog Data
|
||||||
|
#ifdef AFX_DESIGN_TIME
|
||||||
|
enum { IDD = IDD_NONSTREAMINGSPEECHRECOGNITION_DIALOG };
|
||||||
|
#endif
|
||||||
|
|
||||||
|
protected:
|
||||||
|
virtual void DoDataExchange(CDataExchange *pDX); // DDX/DDV support
|
||||||
|
|
||||||
|
// Implementation
|
||||||
|
protected:
|
||||||
|
HICON m_hIcon;
|
||||||
|
|
||||||
|
// Generated message map functions
|
||||||
|
virtual BOOL OnInitDialog();
|
||||||
|
afx_msg void OnPaint();
|
||||||
|
afx_msg HCURSOR OnQueryDragIcon();
|
||||||
|
DECLARE_MESSAGE_MAP()
|
||||||
|
public:
|
||||||
|
afx_msg void OnBnClickedOk();
|
||||||
|
int RunThread();
|
||||||
|
|
||||||
|
private:
|
||||||
|
Microphone mic_;
|
||||||
|
|
||||||
|
SherpaOnnxOfflineRecognizer *recognizer_ = nullptr;
|
||||||
|
SherpaOnnxOfflineRecognizerConfig config_;
|
||||||
|
|
||||||
|
PaStream *pa_stream_ = nullptr;
|
||||||
|
CButton my_btn_;
|
||||||
|
CEdit my_text_;
|
||||||
|
std::vector<std::string> results_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
bool started_ = false;
|
||||||
|
std::vector<float> samples_;
|
||||||
|
|
||||||
|
private:
|
||||||
|
void AppendTextToEditCtrl(const std::string &s);
|
||||||
|
void AppendLineToMultilineEditCtrl(const std::string &s);
|
||||||
|
void InitMicrophone();
|
||||||
|
|
||||||
|
bool Exists(const std::string &filename);
|
||||||
|
void InitRecognizer();
|
||||||
|
|
||||||
|
void InitParaformer();
|
||||||
|
void ShowInitRecognizerHelpMessage();
|
||||||
|
};
|
||||||
18
mfc-examples/NonStreamingSpeechRecognition/Resource.h
Normal file
18
mfc-examples/NonStreamingSpeechRecognition/Resource.h
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
//{{NO_DEPENDENCIES}}
|
||||||
|
// Microsoft Visual C++ generated include file.
|
||||||
|
// Used by NonStreamingSpeechRecognition.rc
|
||||||
|
//
|
||||||
|
#define IDD_NONSTREAMINGSPEECHRECOGNITION_DIALOG 102
|
||||||
|
#define IDR_MAINFRAME 128
|
||||||
|
#define IDC_EDIT1 1000
|
||||||
|
|
||||||
|
// Next default values for new objects
|
||||||
|
//
|
||||||
|
#ifdef APSTUDIO_INVOKED
|
||||||
|
#ifndef APSTUDIO_READONLY_SYMBOLS
|
||||||
|
#define _APS_NEXT_RESOURCE_VALUE 130
|
||||||
|
#define _APS_NEXT_COMMAND_VALUE 32771
|
||||||
|
#define _APS_NEXT_CONTROL_VALUE 1001
|
||||||
|
#define _APS_NEXT_SYMED_VALUE 101
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
26
mfc-examples/NonStreamingSpeechRecognition/framework.h
Normal file
26
mfc-examples/NonStreamingSpeechRecognition/framework.h
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifndef VC_EXTRALEAN
|
||||||
|
#define VC_EXTRALEAN // Exclude rarely-used stuff from Windows headers
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "targetver.h"
|
||||||
|
|
||||||
|
#define _ATL_CSTRING_EXPLICIT_CONSTRUCTORS // some CString constructors will be
|
||||||
|
// explicit
|
||||||
|
|
||||||
|
// turns off MFC's hiding of some common and often safely ignored warning
|
||||||
|
// messages
|
||||||
|
#define _AFX_ALL_WARNINGS
|
||||||
|
|
||||||
|
#include <afxext.h> // MFC extensions
|
||||||
|
#include <afxwin.h> // MFC core and standard components
|
||||||
|
|
||||||
|
#ifndef _AFX_NO_OLE_SUPPORT
|
||||||
|
#include <afxdtctl.h> // MFC support for Internet Explorer 4 Common Controls
|
||||||
|
#endif
|
||||||
|
#ifndef _AFX_NO_AFXCMN_SUPPORT
|
||||||
|
#include <afxcmn.h> // MFC support for Windows Common Controls
|
||||||
|
#endif // _AFX_NO_AFXCMN_SUPPORT
|
||||||
|
|
||||||
|
#include <afxcontrolbars.h> // MFC support for ribbons and control bars
|
||||||
6
mfc-examples/NonStreamingSpeechRecognition/pch.cpp
Normal file
6
mfc-examples/NonStreamingSpeechRecognition/pch.cpp
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
// pch.cpp: source file corresponding to the pre-compiled header
|
||||||
|
|
||||||
|
#include "pch.h"
|
||||||
|
|
||||||
|
// When you are using pre-compiled headers, this source file is necessary for
|
||||||
|
// compilation to succeed.
|
||||||
15
mfc-examples/NonStreamingSpeechRecognition/pch.h
Normal file
15
mfc-examples/NonStreamingSpeechRecognition/pch.h
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
// pch.h: This is a precompiled header file.
|
||||||
|
// Files listed below are compiled only once, improving build performance for
|
||||||
|
// future builds. This also affects IntelliSense performance, including code
|
||||||
|
// completion and many code browsing features. However, files listed here are
|
||||||
|
// ALL re-compiled if any one of them is updated between builds. Do not add
|
||||||
|
// files here that you will be updating frequently as this negates the
|
||||||
|
// performance advantage.
|
||||||
|
|
||||||
|
#ifndef PCH_H
|
||||||
|
#define PCH_H
|
||||||
|
|
||||||
|
// add headers that you want to pre-compile here
|
||||||
|
#include "framework.h"
|
||||||
|
|
||||||
|
#endif // PCH_H
|
||||||
Binary file not shown.
|
After Width: | Height: | Size: 66 KiB |
Binary file not shown.
@@ -0,0 +1,50 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<ImportGroup Label="PropertySheets" />
|
||||||
|
<PropertyGroup Label="UserMacros" />
|
||||||
|
<PropertyGroup>
|
||||||
|
<SherpaOnnxBuildDirectory>..\..\build</SherpaOnnxBuildDirectory>
|
||||||
|
<SherpaOnnxInstallDirectory>..\..\build\install</SherpaOnnxInstallDirectory>
|
||||||
|
<SherpaOnnxLibraries>
|
||||||
|
sherpa-onnx-portaudio_static.lib;
|
||||||
|
sherpa-onnx-c-api.lib;
|
||||||
|
sherpa-onnx-core.lib;
|
||||||
|
kaldi-native-fbank-core.lib;
|
||||||
|
absl_base.lib;
|
||||||
|
absl_city.lib;
|
||||||
|
absl_hash.lib;
|
||||||
|
absl_low_level_hash.lib;
|
||||||
|
absl_raw_hash_set.lib;
|
||||||
|
absl_raw_logging_internal.lib;
|
||||||
|
absl_throw_delegate.lib;
|
||||||
|
clog.lib;
|
||||||
|
cpuinfo.lib;
|
||||||
|
flatbuffers.lib;
|
||||||
|
libprotobuf-lite.lib;
|
||||||
|
onnx.lib;
|
||||||
|
onnx_proto.lib;
|
||||||
|
onnxruntime_common.lib;
|
||||||
|
onnxruntime_flatbuffers.lib;
|
||||||
|
onnxruntime_framework.lib;
|
||||||
|
onnxruntime_graph.lib;
|
||||||
|
onnxruntime_mlas.lib;
|
||||||
|
onnxruntime_optimizer.lib;
|
||||||
|
onnxruntime_providers.lib;
|
||||||
|
onnxruntime_session.lib;
|
||||||
|
onnxruntime_util.lib;
|
||||||
|
re2.lib;
|
||||||
|
</SherpaOnnxLibraries>
|
||||||
|
</PropertyGroup>
|
||||||
|
<ItemDefinitionGroup>
|
||||||
|
<ClCompile>
|
||||||
|
<AdditionalIncludeDirectories>
|
||||||
|
$(SherpaOnnxBuildDirectory)\_deps\portaudio-src\include;
|
||||||
|
$(SherpaOnnxInstallDirectory)\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
|
</ClCompile>
|
||||||
|
<Link>
|
||||||
|
<AdditionalLibraryDirectories>$(SherpaOnnxInstallDirectory)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||||
|
<AdditionalDependencies>$(SherpaOnnxLibraries);</AdditionalDependencies>
|
||||||
|
</Link>
|
||||||
|
</ItemDefinitionGroup>
|
||||||
|
<ItemGroup />
|
||||||
|
</Project>
|
||||||
9
mfc-examples/NonStreamingSpeechRecognition/targetver.h
Normal file
9
mfc-examples/NonStreamingSpeechRecognition/targetver.h
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
// Including SDKDDKVer.h defines the highest available Windows platform.
|
||||||
|
|
||||||
|
// If you wish to build your application for a previous Windows platform,
|
||||||
|
// include WinSDKVer.h and set the _WIN32_WINNT macro to the platform you wish
|
||||||
|
// to support before including SDKDDKVer.h.
|
||||||
|
|
||||||
|
#include <SDKDDKVer.h>
|
||||||
@@ -3,11 +3,19 @@
|
|||||||
This directory contains examples showing how to use Next-gen Kaldi in MFC
|
This directory contains examples showing how to use Next-gen Kaldi in MFC
|
||||||
for speech recognition.
|
for speech recognition.
|
||||||
|
|
||||||
Caution: You need to use Windows and install Visual Studio in order to run it.
|
Caution: You need to use Windows and install Visual Studio 2022 in order to
|
||||||
|
compile it.
|
||||||
|
|
||||||
|
Hint: If you don't want to install Visual Studio, you can find below
|
||||||
|
about how to download pre-compiled `exe`.
|
||||||
|
|
||||||
We use bash script below to demonstrate how to use it. Please change
|
We use bash script below to demonstrate how to use it. Please change
|
||||||
the commands accordingly for Windows.
|
the commands accordingly for Windows.
|
||||||
|
|
||||||
## Streaming speech recognition
|
## How to compile
|
||||||
|
|
||||||
|
|
||||||
|
First, we need to compile sherpa-onnx:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
mkdir -p $HOME/open-source
|
mkdir -p $HOME/open-source
|
||||||
@@ -19,7 +27,6 @@ mkdir build
|
|||||||
|
|
||||||
cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_PREFIX=./install ..
|
cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_PREFIX=./install ..
|
||||||
cmake --build . --config Release --target install
|
cmake --build . --config Release --target install
|
||||||
|
|
||||||
cd ../mfc-examples
|
cd ../mfc-examples
|
||||||
|
|
||||||
msbuild ./mfc-examples.sln /property:Configuration=Release /property:Platform=x64
|
msbuild ./mfc-examples.sln /property:Configuration=Release /property:Platform=x64
|
||||||
@@ -27,26 +34,13 @@ msbuild ./mfc-examples.sln /property:Configuration=Release /property:Platform=x6
|
|||||||
# now run the program
|
# now run the program
|
||||||
|
|
||||||
./x64/Release/StreamingSpeechRecognition.exe
|
./x64/Release/StreamingSpeechRecognition.exe
|
||||||
|
./x64/Release/NonStreamingSpeechRecognition.exe
|
||||||
```
|
```
|
||||||
|
|
||||||
Note that we also need to download pre-trained models. Please
|
If you don't want to compile the project by yourself, you can download
|
||||||
refer to https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
|
pre-compiled `exe` from https://github.com/k2-fsa/sherpa-onnx/releases
|
||||||
for a list of streaming models.
|
|
||||||
|
|
||||||
We use the following model for demonstration.
|
For instance, you can use the following addresses:
|
||||||
|
|
||||||
```bash
|
- https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.5.1/sherpa-onnx-streaming-v1.5.1.exe
|
||||||
cd $HOME/open-source/sherpa-onnx/mfc-examples/x64/Release
|
- https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.5.1/sherpa-onnx-non-streaming-v1.5.1.exe
|
||||||
wget https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx
|
|
||||||
wget https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx
|
|
||||||
wget https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx
|
|
||||||
wget https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/data/lang_char/tokens.txt
|
|
||||||
|
|
||||||
# now rename
|
|
||||||
mv encoder-epoch-12-avg-4-chunk-16-left-128.onnx encoder.onnx
|
|
||||||
mv decoder-epoch-12-avg-4-chunk-16-left-128.onnx decoder.onnx
|
|
||||||
mv joiner-epoch-12-avg-4-chunk-16-left-128.onnx joiner.onnx
|
|
||||||
|
|
||||||
# Now run it!
|
|
||||||
./StreamingSpeechRecognition.exe
|
|
||||||
```
|
|
||||||
|
|||||||
@@ -3,12 +3,14 @@
|
|||||||
// application.
|
// application.
|
||||||
//
|
//
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
#include "pch.h"
|
#include "pch.h"
|
||||||
#include "framework.h"
|
#include "framework.h"
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
#include "StreamingSpeechRecognition.h"
|
#include "StreamingSpeechRecognition.h"
|
||||||
#include "StreamingSpeechRecognitionDlg.h"
|
|
||||||
|
|
||||||
|
#include "StreamingSpeechRecognitionDlg.h"
|
||||||
|
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
#define new DEBUG_NEW
|
#define new DEBUG_NEW
|
||||||
|
|||||||
@@ -1,10 +1,11 @@
|
|||||||
|
|
||||||
// StreamingSpeechRecognitionDlg.cpp : implementation file
|
// StreamingSpeechRecognitionDlg.cpp : implementation file
|
||||||
//
|
//
|
||||||
|
// clang-format off
|
||||||
#include "pch.h"
|
#include "pch.h"
|
||||||
#include "framework.h"
|
#include "framework.h"
|
||||||
#include "afxdialogex.h"
|
#include "afxdialogex.h"
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
#include "StreamingSpeechRecognitionDlg.h"
|
#include "StreamingSpeechRecognitionDlg.h"
|
||||||
|
|
||||||
@@ -15,7 +16,6 @@
|
|||||||
|
|
||||||
#include "StreamingSpeechRecognition.h"
|
#include "StreamingSpeechRecognition.h"
|
||||||
|
|
||||||
|
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
#define new DEBUG_NEW
|
#define new DEBUG_NEW
|
||||||
#endif
|
#endif
|
||||||
@@ -223,6 +223,7 @@ void CStreamingSpeechRecognitionDlg::InitMicrophone() {
|
|||||||
// exit(-1);
|
// exit(-1);
|
||||||
AppendLineToMultilineEditCtrl("No default input device found!");
|
AppendLineToMultilineEditCtrl("No default input device found!");
|
||||||
my_btn_.EnableWindow(FALSE);
|
my_btn_.EnableWindow(FALSE);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
AppendLineToMultilineEditCtrl(std::string("Selected device ") +
|
AppendLineToMultilineEditCtrl(std::string("Selected device ") +
|
||||||
Pa_GetDeviceInfo(default_device)->name);
|
Pa_GetDeviceInfo(default_device)->name);
|
||||||
@@ -309,7 +310,6 @@ void CStreamingSpeechRecognitionDlg::InitRecognizer() {
|
|||||||
msg += "\r\n";
|
msg += "\r\n";
|
||||||
msg += "That's it!\r\n";
|
msg += "That's it!\r\n";
|
||||||
|
|
||||||
|
|
||||||
AppendLineToMultilineEditCtrl(msg);
|
AppendLineToMultilineEditCtrl(msg);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -398,8 +398,6 @@ void CStreamingSpeechRecognitionDlg::AppendTextToEditCtrl(
|
|||||||
// put the selection at the end of text
|
// put the selection at the end of text
|
||||||
my_text_.SetSel(nLength, nLength);
|
my_text_.SetSel(nLength, nLength);
|
||||||
// replace the selection
|
// replace the selection
|
||||||
CString str;
|
|
||||||
str.Format(_T("%s"), s.c_str());
|
|
||||||
|
|
||||||
std::wstring wstr = Utf8ToUtf16(s);
|
std::wstring wstr = Utf8ToUtf16(s);
|
||||||
|
|
||||||
|
|||||||
@@ -1,10 +1,12 @@
|
|||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||||
# Visual Studio Version 16
|
# Visual Studio Version 17
|
||||||
VisualStudioVersion = 16.0.32630.194
|
VisualStudioVersion = 17.6.33829.357
|
||||||
MinimumVisualStudioVersion = 10.0.40219.1
|
MinimumVisualStudioVersion = 10.0.40219.1
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "StreamingSpeechRecognition", "StreamingSpeechRecognition\StreamingSpeechRecognition.vcxproj", "{A79C2604-C33D-497C-9770-D34E118B77FE}"
|
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "StreamingSpeechRecognition", "StreamingSpeechRecognition\StreamingSpeechRecognition.vcxproj", "{A79C2604-C33D-497C-9770-D34E118B77FE}"
|
||||||
EndProject
|
EndProject
|
||||||
|
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "NonStreamingSpeechRecognition", "NonStreamingSpeechRecognition\NonStreamingSpeechRecognition.vcxproj", "{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}"
|
||||||
|
EndProject
|
||||||
Global
|
Global
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
Debug|x64 = Debug|x64
|
Debug|x64 = Debug|x64
|
||||||
@@ -21,6 +23,14 @@ Global
|
|||||||
{A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x64.Build.0 = Release|x64
|
{A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x64.Build.0 = Release|x64
|
||||||
{A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x86.ActiveCfg = Release|Win32
|
{A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x86.ActiveCfg = Release|Win32
|
||||||
{A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x86.Build.0 = Release|Win32
|
{A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x86.Build.0 = Release|Win32
|
||||||
|
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x64.ActiveCfg = Debug|x64
|
||||||
|
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x64.Build.0 = Debug|x64
|
||||||
|
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x86.ActiveCfg = Debug|Win32
|
||||||
|
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x86.Build.0 = Debug|Win32
|
||||||
|
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x64.ActiveCfg = Release|x64
|
||||||
|
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x64.Build.0 = Release|x64
|
||||||
|
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x86.ActiveCfg = Release|Win32
|
||||||
|
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x86.Build.0 = Release|Win32
|
||||||
EndGlobalSection
|
EndGlobalSection
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
GlobalSection(SolutionProperties) = preSolution
|
||||||
HideSolutionNode = FALSE
|
HideSolutionNode = FALSE
|
||||||
|
|||||||
@@ -27,36 +27,38 @@ struct SherpaOnnxDisplay {
|
|||||||
std::unique_ptr<sherpa_onnx::Display> impl;
|
std::unique_ptr<sherpa_onnx::Display> impl;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define SHERPA_ONNX_OR(x, y) (x ? x : y)
|
||||||
|
|
||||||
SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer(
|
SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer(
|
||||||
const SherpaOnnxOnlineRecognizerConfig *config) {
|
const SherpaOnnxOnlineRecognizerConfig *config) {
|
||||||
sherpa_onnx::OnlineRecognizerConfig recognizer_config;
|
sherpa_onnx::OnlineRecognizerConfig recognizer_config;
|
||||||
|
|
||||||
recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate;
|
recognizer_config.feat_config.sampling_rate = SHERPA_ONNX_OR(config->feat_config.sample_rate, 16000);
|
||||||
recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim;
|
recognizer_config.feat_config.feature_dim = SHERPA_ONNX_OR(config->feat_config.feature_dim, 80);
|
||||||
|
|
||||||
recognizer_config.model_config.encoder_filename =
|
recognizer_config.model_config.encoder_filename =
|
||||||
config->model_config.encoder;
|
SHERPA_ONNX_OR(config->model_config.encoder, "");
|
||||||
recognizer_config.model_config.decoder_filename =
|
recognizer_config.model_config.decoder_filename =
|
||||||
config->model_config.decoder;
|
SHERPA_ONNX_OR(config->model_config.decoder, "");
|
||||||
recognizer_config.model_config.joiner_filename = config->model_config.joiner;
|
recognizer_config.model_config.joiner_filename = SHERPA_ONNX_OR(config->model_config.joiner, "");
|
||||||
recognizer_config.model_config.tokens = config->model_config.tokens;
|
recognizer_config.model_config.tokens = SHERPA_ONNX_OR(config->model_config.tokens, "");
|
||||||
recognizer_config.model_config.num_threads = config->model_config.num_threads;
|
recognizer_config.model_config.num_threads = SHERPA_ONNX_OR(config->model_config.num_threads, 1);
|
||||||
recognizer_config.model_config.provider = config->model_config.provider;
|
recognizer_config.model_config.provider = SHERPA_ONNX_OR(config->model_config.provider, "cpu");
|
||||||
recognizer_config.model_config.debug = config->model_config.debug;
|
recognizer_config.model_config.debug = SHERPA_ONNX_OR(config->model_config.debug, 0);
|
||||||
|
|
||||||
recognizer_config.decoding_method = config->decoding_method;
|
recognizer_config.decoding_method = SHERPA_ONNX_OR(config->decoding_method, "greedy_search");
|
||||||
recognizer_config.max_active_paths = config->max_active_paths;
|
recognizer_config.max_active_paths = SHERPA_ONNX_OR(config->max_active_paths, 4);
|
||||||
|
|
||||||
recognizer_config.enable_endpoint = config->enable_endpoint;
|
recognizer_config.enable_endpoint = SHERPA_ONNX_OR(config->enable_endpoint, 0);
|
||||||
|
|
||||||
recognizer_config.endpoint_config.rule1.min_trailing_silence =
|
recognizer_config.endpoint_config.rule1.min_trailing_silence =
|
||||||
config->rule1_min_trailing_silence;
|
SHERPA_ONNX_OR(config->rule1_min_trailing_silence, 2.4);
|
||||||
|
|
||||||
recognizer_config.endpoint_config.rule2.min_trailing_silence =
|
recognizer_config.endpoint_config.rule2.min_trailing_silence =
|
||||||
config->rule2_min_trailing_silence;
|
SHERPA_ONNX_OR(config->rule2_min_trailing_silence, 1.2);
|
||||||
|
|
||||||
recognizer_config.endpoint_config.rule3.min_utterance_length =
|
recognizer_config.endpoint_config.rule3.min_utterance_length =
|
||||||
config->rule3_min_utterance_length;
|
SHERPA_ONNX_OR(config->rule3_min_utterance_length, 20);
|
||||||
|
|
||||||
if (config->model_config.debug) {
|
if (config->model_config.debug) {
|
||||||
fprintf(stderr, "%s\n", recognizer_config.ToString().c_str());
|
fprintf(stderr, "%s\n", recognizer_config.ToString().c_str());
|
||||||
@@ -171,34 +173,34 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer(
|
|||||||
const SherpaOnnxOfflineRecognizerConfig *config) {
|
const SherpaOnnxOfflineRecognizerConfig *config) {
|
||||||
sherpa_onnx::OfflineRecognizerConfig recognizer_config;
|
sherpa_onnx::OfflineRecognizerConfig recognizer_config;
|
||||||
|
|
||||||
recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate;
|
recognizer_config.feat_config.sampling_rate = SHERPA_ONNX_OR(config->feat_config.sample_rate, 16000);
|
||||||
|
|
||||||
recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim;
|
recognizer_config.feat_config.feature_dim = SHERPA_ONNX_OR(config->feat_config.feature_dim, 80);
|
||||||
|
|
||||||
recognizer_config.model_config.transducer.encoder_filename =
|
recognizer_config.model_config.transducer.encoder_filename =
|
||||||
config->model_config.transducer.encoder;
|
SHERPA_ONNX_OR(config->model_config.transducer.encoder, "");
|
||||||
|
|
||||||
recognizer_config.model_config.transducer.decoder_filename =
|
recognizer_config.model_config.transducer.decoder_filename =
|
||||||
config->model_config.transducer.decoder;
|
SHERPA_ONNX_OR(config->model_config.transducer.decoder, "");
|
||||||
|
|
||||||
recognizer_config.model_config.transducer.joiner_filename =
|
recognizer_config.model_config.transducer.joiner_filename =
|
||||||
config->model_config.transducer.joiner;
|
SHERPA_ONNX_OR(config->model_config.transducer.joiner,"");
|
||||||
|
|
||||||
recognizer_config.model_config.paraformer.model =
|
recognizer_config.model_config.paraformer.model =
|
||||||
config->model_config.paraformer.model;
|
SHERPA_ONNX_OR(config->model_config.paraformer.model, "");
|
||||||
|
|
||||||
recognizer_config.model_config.nemo_ctc.model =
|
recognizer_config.model_config.nemo_ctc.model =
|
||||||
config->model_config.nemo_ctc.model;
|
SHERPA_ONNX_OR(config->model_config.nemo_ctc.model, "");
|
||||||
|
|
||||||
recognizer_config.model_config.tokens = config->model_config.tokens;
|
recognizer_config.model_config.tokens = SHERPA_ONNX_OR(config->model_config.tokens, "");
|
||||||
recognizer_config.model_config.num_threads = config->model_config.num_threads;
|
recognizer_config.model_config.num_threads = SHERPA_ONNX_OR(config->model_config.num_threads, 1);
|
||||||
recognizer_config.model_config.debug = config->model_config.debug;
|
recognizer_config.model_config.debug = SHERPA_ONNX_OR(config->model_config.debug, 0);
|
||||||
|
|
||||||
recognizer_config.lm_config.model = config->lm_config.model;
|
recognizer_config.lm_config.model = SHERPA_ONNX_OR(config->lm_config.model, "");
|
||||||
recognizer_config.lm_config.scale = config->lm_config.scale;
|
recognizer_config.lm_config.scale = SHERPA_ONNX_OR(config->lm_config.scale, 1.0);
|
||||||
|
|
||||||
recognizer_config.decoding_method = config->decoding_method;
|
recognizer_config.decoding_method = SHERPA_ONNX_OR(config->decoding_method, "greedy_search");
|
||||||
recognizer_config.max_active_paths = config->max_active_paths;
|
recognizer_config.max_active_paths = SHERPA_ONNX_OR(config->max_active_paths, 4);
|
||||||
|
|
||||||
if (config->model_config.debug) {
|
if (config->model_config.debug) {
|
||||||
fprintf(stderr, "%s\n", recognizer_config.ToString().c_str());
|
fprintf(stderr, "%s\n", recognizer_config.ToString().c_str());
|
||||||
|
|||||||
Reference in New Issue
Block a user