Refactor C# code and support building nuget packages for cross-platforms (#144)
This commit is contained in:
135
.github/workflows/dot-net.yaml
vendored
Normal file
135
.github/workflows/dot-net.yaml
vendored
Normal file
@@ -0,0 +1,135 @@
|
||||
name: dot-net
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- dot-net
|
||||
tags:
|
||||
- '*'
|
||||
|
||||
concurrency:
|
||||
group: dot-net-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
build-libs:
|
||||
name: dot-net for ${{ matrix.os }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, windows-latest, macos-latest]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
# see https://cibuildwheel.readthedocs.io/en/stable/changelog/
|
||||
# for a list of versions
|
||||
- name: Build wheels
|
||||
uses: pypa/cibuildwheel@v2.11.4
|
||||
env:
|
||||
CIBW_BEFORE_BUILD: "pip install -U cmake numpy"
|
||||
CIBW_BUILD: "cp38-*64"
|
||||
CIBW_SKIP: "cp27-* cp35-* cp36-* *-win32 pp* *-musllinux* *-manylinux_i686"
|
||||
CIBW_BUILD_VERBOSITY: 3
|
||||
CIBW_ENVIRONMENT_LINUX: LD_LIBRARY_PATH='/project/build/bdist.linux-x86_64/wheel/sherpa_onnx/lib'
|
||||
CIBW_REPAIR_WHEEL_COMMAND_MACOS: ""
|
||||
|
||||
- name: Display wheels
|
||||
shell: bash
|
||||
run: |
|
||||
ls -lh ./wheelhouse/*.whl
|
||||
unzip -l ./wheelhouse/*.whl
|
||||
|
||||
- uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ matrix.os }}-wheels
|
||||
path: ./wheelhouse/*.whl
|
||||
|
||||
build-nuget-packages:
|
||||
name: build-nuget-packages
|
||||
runs-on: ubuntu-latest
|
||||
needs: build-libs
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: Retrieve artifact from ubuntu-latest
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: ubuntu-latest-wheels
|
||||
path: ./linux
|
||||
|
||||
- name: Retrieve artifact from macos-latest
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: macos-latest-wheels
|
||||
path: ./macos
|
||||
|
||||
- name: Retrieve artifact from windows-latest
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: windows-latest-wheels
|
||||
path: ./windows
|
||||
|
||||
- name: Display wheels
|
||||
shell: bash
|
||||
run: |
|
||||
tree .
|
||||
|
||||
- name: Unzip Ubuntu wheels
|
||||
shell: bash
|
||||
run: |
|
||||
cd linux
|
||||
unzip ./*.whl
|
||||
tree .
|
||||
|
||||
- name: Unzip macOS wheels
|
||||
shell: bash
|
||||
run: |
|
||||
cd macos
|
||||
unzip ./*.whl
|
||||
tree .
|
||||
|
||||
- name: Unzip Windows wheels
|
||||
shell: bash
|
||||
run: |
|
||||
cd windows
|
||||
unzip ./*.whl
|
||||
cp -v ./*.dll sherpa_onnx/lib/
|
||||
tree .
|
||||
|
||||
- name: Setup .NET Core 3.1
|
||||
uses: actions/setup-dotnet@v1
|
||||
with:
|
||||
dotnet-version: 3.1.x
|
||||
|
||||
- name: Setup .NET 7.0
|
||||
uses: actions/setup-dotnet@v1
|
||||
with:
|
||||
dotnet-version: 7.0.x
|
||||
|
||||
- name: Check dotnet
|
||||
run: dotnet --info
|
||||
|
||||
- name: build nuget packages
|
||||
shell: bash
|
||||
run: |
|
||||
cd scripts/dotnet
|
||||
./run.sh
|
||||
ls -lh packages
|
||||
|
||||
- uses: actions/upload-artifact@v2
|
||||
name: upload nuget packages
|
||||
with:
|
||||
name: nuget-packages
|
||||
path: scripts/dotnet/packages/*.nupkg
|
||||
|
||||
- name: publish .Net packages to nuget.org
|
||||
if: github.repository == 'csukuangfj/sherpa-onnx' || github.repository == 'k2-fsa/sherpa-onnx'
|
||||
shell: bash
|
||||
env:
|
||||
API_KEY: ${{ secrets.NUGET_API_KEY }}
|
||||
run: |
|
||||
# API_KEY is valid until 2024.05.02
|
||||
cd scripts/dotnet/packages
|
||||
dotnet nuget push ./org.k2fsa.sherpa.onnx.*.nupkg --skip-duplicate --api-key $API_KEY --source https://api.nuget.org/v3/index.json
|
||||
70
.github/workflows/test-dot-net.yaml
vendored
Normal file
70
.github/workflows/test-dot-net.yaml
vendored
Normal file
@@ -0,0 +1,70 @@
|
||||
name: test-dot-net
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
paths:
|
||||
- '.github/workflows/test-dot-net'
|
||||
- 'dotnet-examples/**'
|
||||
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
paths:
|
||||
- '.github/workflows/test-dot-net'
|
||||
- 'dotnet-examples/**'
|
||||
|
||||
schedule:
|
||||
# minute (0-59)
|
||||
# hour (0-23)
|
||||
# day of the month (1-31)
|
||||
# month (1-12)
|
||||
# day of the week (0-6)
|
||||
# nightly build at 23:50 UTC time every day
|
||||
- cron: "50 23 * * *"
|
||||
|
||||
concurrency:
|
||||
group: test-dot-net
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
test-dot-net:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup .NET Core 3.1
|
||||
uses: actions/setup-dotnet@v1
|
||||
with:
|
||||
dotnet-version: 3.1.x
|
||||
|
||||
- name: Setup .NET 6.0
|
||||
uses: actions/setup-dotnet@v1
|
||||
with:
|
||||
dotnet-version: 6.0.x
|
||||
|
||||
- name: Check dotnet
|
||||
run: dotnet --info
|
||||
|
||||
- name: Decode a file
|
||||
shell: bash
|
||||
run: |
|
||||
cd dotnet-examples/
|
||||
cd online-decode-files
|
||||
./run.sh
|
||||
|
||||
cd ../offline-decode-files
|
||||
./run-nemo-ctc.sh
|
||||
./run-paraformer.sh
|
||||
./run-zipformer.sh
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -57,3 +57,4 @@ sherpa-onnx-nemo-ctc-en-citrinet-512
|
||||
run-offline-decode-files-nemo-ctc.sh
|
||||
*.jar
|
||||
sherpa-onnx-nemo-ctc-*
|
||||
*.wav
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
|
||||
project(sherpa-onnx)
|
||||
|
||||
set(SHERPA_ONNX_VERSION "1.4.1")
|
||||
set(SHERPA_ONNX_VERSION "1.4.2")
|
||||
|
||||
# Disable warning about
|
||||
#
|
||||
@@ -37,16 +37,12 @@ endif()
|
||||
set(CMAKE_INSTALL_RPATH ${SHERPA_ONNX_RPATH_ORIGIN})
|
||||
set(CMAKE_BUILD_RPATH ${SHERPA_ONNX_RPATH_ORIGIN})
|
||||
|
||||
if(BUILD_SHARED_LIBS AND MSVC)
|
||||
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
|
||||
endif()
|
||||
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
message(STATUS "No CMAKE_BUILD_TYPE given, default to Release")
|
||||
set(CMAKE_BUILD_TYPE Release)
|
||||
endif()
|
||||
|
||||
if(DEFINED ANDROID_ABI)
|
||||
if(DEFINED ANDROID_ABI AND NOT SHERPA_ONNX_ENABLE_JNI)
|
||||
message(STATUS "Set SHERPA_ONNX_ENABLE_JNI to ON for Android")
|
||||
set(SHERPA_ONNX_ENABLE_JNI ON CACHE BOOL "" FORCE)
|
||||
endif()
|
||||
@@ -61,6 +57,10 @@ if(SHERPA_ONNX_ENABLE_JNI AND NOT BUILD_SHARED_LIBS)
|
||||
set(BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE)
|
||||
endif()
|
||||
|
||||
if(BUILD_SHARED_LIBS AND MSVC)
|
||||
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
|
||||
endif()
|
||||
|
||||
message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
|
||||
message(STATUS "CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}")
|
||||
message(STATUS "BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}")
|
||||
|
||||
@@ -41,7 +41,6 @@ try:
|
||||
# -linux_x86_64.whl
|
||||
self.root_is_pure = False
|
||||
|
||||
|
||||
except ImportError:
|
||||
bdist_wheel = None
|
||||
|
||||
@@ -78,7 +77,6 @@ class BuildExtension(build_ext):
|
||||
extra_cmake_args += " -DSHERPA_ONNX_ENABLE_CHECK=OFF "
|
||||
extra_cmake_args += " -DSHERPA_ONNX_ENABLE_PYTHON=ON "
|
||||
extra_cmake_args += " -DSHERPA_ONNX_ENABLE_PORTAUDIO=ON "
|
||||
extra_cmake_args += " -DSHERPA_ONNX_ENABLE_C_API=OFF "
|
||||
extra_cmake_args += " -DSHERPA_ONNX_ENABLE_WEBSOCKET=ON "
|
||||
|
||||
if "PYTHON_EXECUTABLE" not in cmake_args:
|
||||
|
||||
@@ -1,255 +0,0 @@
|
||||
// See https://aka.ms/new-console-template for more information
|
||||
// Copyright (c) 2023 by manyeyes
|
||||
using SherpaOnnx;
|
||||
/// Please refer to
|
||||
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||
/// to download pre-trained models. That is, you can find encoder-xxx.onnx
|
||||
/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct
|
||||
/// from there.
|
||||
|
||||
/// download model eg:
|
||||
/// (The directory where the application runs)
|
||||
/// [/path/to]=System.AppDomain.CurrentDomain.BaseDirectory
|
||||
/// cd /path/to
|
||||
/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-en-2023-04-01
|
||||
/// git clone https://huggingface.co/csukuangfj/paraformer-onnxruntime-python-example
|
||||
/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-nemo-ctc-en-citrinet-512
|
||||
|
||||
/// NuGet for sherpa-onnx
|
||||
/// PM > Install-Package NAudio -version 2.1.0 -Project sherpa-onnx
|
||||
/// PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx
|
||||
|
||||
// transducer Usage:
|
||||
/*
|
||||
.\SherpaOnnx.Examples.exe `
|
||||
--tokens=./all_models/sherpa-onnx-conformer-en-2023-03-18/tokens.txt `
|
||||
--encoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/encoder-epoch-99-avg-1.onnx `
|
||||
--decoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/decoder-epoch-99-avg-1.onnx `
|
||||
--joiner=./all_models/sherpa-onnx-conformer-en-2023-03-18/joiner-epoch-99-avg-1.onnx `
|
||||
--num-threads=2 `
|
||||
--decoding-method=greedy_search `
|
||||
--debug=false `
|
||||
./all_models/sherpa-onnx-conformer-en-2023-03-18/test_wavs/0.wav
|
||||
*/
|
||||
|
||||
// paraformer Usage:
|
||||
/*
|
||||
.\SherpaOnnx.Examples.exe `
|
||||
--tokens=./all_models/paraformer-onnxruntime-python-example/tokens.txt `
|
||||
--paraformer=./all_models/paraformer-onnxruntime-python-example/model.onnx `
|
||||
--num-threads=2 `
|
||||
--decoding-method=greedy_search `
|
||||
--debug=false `
|
||||
./all_models/paraformer-onnxruntime-python-example/test_wavs/0.wav
|
||||
*/
|
||||
|
||||
// paraformer Usage:
|
||||
/*
|
||||
.\SherpaOnnx.Examples.exe `
|
||||
--tokens=./all_models/paraformer-onnxruntime-python-example/tokens.txt `
|
||||
--paraformer=./all_models/paraformer-onnxruntime-python-example/model.onnx `
|
||||
--num-threads=2 `
|
||||
--decoding-method=greedy_search `
|
||||
--debug=false `
|
||||
./all_models/paraformer-onnxruntime-python-example/test_wavs/0.wav
|
||||
*/
|
||||
|
||||
|
||||
internal class OfflineDecodeFiles
|
||||
{
|
||||
static void Main(string[] args)
|
||||
{
|
||||
string usage = @"
|
||||
-----------------------------
|
||||
transducer Usage:
|
||||
--tokens=./all_models/sherpa-onnx-conformer-en-2023-03-18/tokens.txt `
|
||||
--encoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/encoder-epoch-99-avg-1.onnx `
|
||||
--decoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/decoder-epoch-99-avg-1.onnx `
|
||||
--joiner=./all_models/sherpa-onnx-conformer-en-2023-03-18/joiner-epoch-99-avg-1.onnx `
|
||||
--num-threads=2 `
|
||||
--decoding-method=greedy_search `
|
||||
--debug=false `
|
||||
./all_models/sherpa-onnx-conformer-en-2023-03-18/test_wavs/0.wav
|
||||
|
||||
paraformer Usage:
|
||||
--tokens=./all_models/paraformer-onnxruntime-python-example/tokens.txt `
|
||||
--paraformer=./all_models/paraformer-onnxruntime-python-example/model.onnx `
|
||||
--num-threads=2 `
|
||||
--decoding-method=greedy_search `
|
||||
--debug=false `
|
||||
./all_models/paraformer-onnxruntime-python-example/test_wavs/0.wav
|
||||
|
||||
nemo Usage:
|
||||
--tokens=./all_models/sherpa-onnx-nemo-ctc-en-citrinet-512/tokens.txt `
|
||||
--nemo_ctc=./all_models/sherpa-onnx-nemo-ctc-en-citrinet-512/model.onnx `
|
||||
--num-threads=2 `
|
||||
--decoding-method=greedy_search `
|
||||
--debug=false `
|
||||
./all_models/sherpa-onnx-nemo-ctc-en-citrinet-512/test_wavs/0.wav
|
||||
-----------------------------
|
||||
";
|
||||
if (args.Length == 0)
|
||||
{
|
||||
System.Console.WriteLine("Please enter the correct parameters:");
|
||||
System.Console.WriteLine(usage);
|
||||
System.Text.StringBuilder sb = new System.Text.StringBuilder();
|
||||
//args = Console.ReadLine().Split(" ");
|
||||
while (true)
|
||||
{
|
||||
string input = Console.ReadLine();
|
||||
sb.AppendLine(input);
|
||||
if (Console.ReadKey().Key == ConsoleKey.Enter)
|
||||
break;
|
||||
}
|
||||
args = sb.ToString().Split("\r\n");
|
||||
}
|
||||
Console.WriteLine("Started!\n");
|
||||
string? applicationBase = System.AppDomain.CurrentDomain.BaseDirectory;
|
||||
List<string> wavFiles = new List<string>();
|
||||
Dictionary<string, string> argsDict = GetDict(args, applicationBase, ref wavFiles);
|
||||
string decoder = argsDict.ContainsKey("decoder") ? Path.Combine(applicationBase, argsDict["decoder"]) : "";
|
||||
string encoder = argsDict.ContainsKey("encoder") ? Path.Combine(applicationBase, argsDict["encoder"]) : "";
|
||||
string joiner = argsDict.ContainsKey("joiner") ? Path.Combine(applicationBase, argsDict["joiner"]) : "";
|
||||
string paraformer = argsDict.ContainsKey("paraformer") ? Path.Combine(applicationBase, argsDict["paraformer"]) : "";
|
||||
string nemo_ctc = argsDict.ContainsKey("nemo_ctc") ? Path.Combine(applicationBase, argsDict["nemo_ctc"]) : "";
|
||||
string tokens = argsDict.ContainsKey("tokens") ? Path.Combine(applicationBase, argsDict["tokens"]) : "";
|
||||
string num_threads = argsDict.ContainsKey("num_threads") ? argsDict["num_threads"] : "";
|
||||
string decoding_method = argsDict.ContainsKey("decoding_method") ? argsDict["decoding_method"] : "";
|
||||
string debug = argsDict.ContainsKey("debug") ? argsDict["debug"] : "";
|
||||
|
||||
OfflineTransducer offlineTransducer = new OfflineTransducer();
|
||||
offlineTransducer.EncoderFilename = encoder;
|
||||
offlineTransducer.DecoderFilename = decoder;
|
||||
offlineTransducer.JoinerFilename = joiner;
|
||||
|
||||
OfflineParaformer offlineParaformer = new OfflineParaformer();
|
||||
offlineParaformer.Model = paraformer;
|
||||
|
||||
OfflineNemoEncDecCtc offlineNemoEncDecCtc = new OfflineNemoEncDecCtc();
|
||||
offlineNemoEncDecCtc.Model = nemo_ctc;
|
||||
|
||||
int numThreads = 0;
|
||||
int.TryParse(num_threads, out numThreads);
|
||||
bool isDebug = false;
|
||||
bool.TryParse(debug, out isDebug);
|
||||
|
||||
string decodingMethod = string.IsNullOrEmpty(decoding_method) ? "" : decoding_method;
|
||||
|
||||
if ((string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner))
|
||||
&& string.IsNullOrEmpty(paraformer)
|
||||
&& string.IsNullOrEmpty(nemo_ctc))
|
||||
{
|
||||
Console.WriteLine("Please specify at least one model");
|
||||
Console.WriteLine(usage);
|
||||
}
|
||||
// batch decode
|
||||
TimeSpan total_duration = TimeSpan.Zero;
|
||||
TimeSpan start_time = TimeSpan.Zero;
|
||||
TimeSpan end_time = TimeSpan.Zero;
|
||||
List<OfflineRecognizerResultEntity> results = new List<OfflineRecognizerResultEntity>();
|
||||
if (!(string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner)))
|
||||
{
|
||||
OfflineRecognizer<OfflineTransducer> offlineRecognizer = new OfflineRecognizer<OfflineTransducer>(
|
||||
offlineTransducer,
|
||||
tokens,
|
||||
num_threads: numThreads,
|
||||
debug: isDebug,
|
||||
decoding_method: decodingMethod);
|
||||
List<float[]> samplesList = new List<float[]>();
|
||||
foreach (string wavFile in wavFiles)
|
||||
{
|
||||
TimeSpan duration = TimeSpan.Zero;
|
||||
float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration);
|
||||
samplesList.Add(samples);
|
||||
total_duration += duration;
|
||||
}
|
||||
OfflineStream[] streams = offlineRecognizer.CreateOfflineStream(samplesList);
|
||||
start_time = new TimeSpan(DateTime.Now.Ticks);
|
||||
offlineRecognizer.DecodeMultipleOfflineStreams(streams);
|
||||
results = offlineRecognizer.GetResults(streams);
|
||||
end_time = new TimeSpan(DateTime.Now.Ticks);
|
||||
}
|
||||
else if (!string.IsNullOrEmpty(paraformer))
|
||||
{
|
||||
OfflineRecognizer<OfflineParaformer> offlineRecognizer = new OfflineRecognizer<OfflineParaformer>(
|
||||
offlineParaformer,
|
||||
tokens,
|
||||
num_threads: numThreads,
|
||||
debug: isDebug,
|
||||
decoding_method: decodingMethod);
|
||||
List<float[]> samplesList = new List<float[]>();
|
||||
foreach (string wavFile in wavFiles)
|
||||
{
|
||||
TimeSpan duration = TimeSpan.Zero;
|
||||
float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration);
|
||||
samplesList.Add(samples);
|
||||
total_duration += duration;
|
||||
}
|
||||
OfflineStream[] streams = offlineRecognizer.CreateOfflineStream(samplesList);
|
||||
start_time = new TimeSpan(DateTime.Now.Ticks);
|
||||
offlineRecognizer.DecodeMultipleOfflineStreams(streams);
|
||||
results = offlineRecognizer.GetResults(streams);
|
||||
end_time = new TimeSpan(DateTime.Now.Ticks);
|
||||
}
|
||||
else if (!string.IsNullOrEmpty(nemo_ctc))
|
||||
{
|
||||
OfflineRecognizer<OfflineNemoEncDecCtc> offlineRecognizer = new OfflineRecognizer<OfflineNemoEncDecCtc>(
|
||||
offlineNemoEncDecCtc,
|
||||
tokens,
|
||||
num_threads: numThreads,
|
||||
debug: isDebug,
|
||||
decoding_method: decodingMethod);
|
||||
List<float[]> samplesList = new List<float[]>();
|
||||
foreach (string wavFile in wavFiles)
|
||||
{
|
||||
TimeSpan duration = TimeSpan.Zero;
|
||||
float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration);
|
||||
samplesList.Add(samples);
|
||||
total_duration += duration;
|
||||
}
|
||||
OfflineStream[] streams = offlineRecognizer.CreateOfflineStream(samplesList);
|
||||
start_time = new TimeSpan(DateTime.Now.Ticks);
|
||||
offlineRecognizer.DecodeMultipleOfflineStreams(streams);
|
||||
results = offlineRecognizer.GetResults(streams);
|
||||
end_time = new TimeSpan(DateTime.Now.Ticks);
|
||||
}
|
||||
|
||||
foreach (var item in results.Zip<OfflineRecognizerResultEntity, string>(wavFiles))
|
||||
{
|
||||
Console.WriteLine("wavFile:{0}", item.Second);
|
||||
Console.WriteLine("text:{0}", item.First.text.ToLower());
|
||||
Console.WriteLine("text_len:{0}\n", item.First.text_len.ToString());
|
||||
}
|
||||
|
||||
double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds;
|
||||
double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds;
|
||||
Console.WriteLine("num_threads:{0}", num_threads);
|
||||
Console.WriteLine("decoding_method:{0}", decodingMethod);
|
||||
Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString());
|
||||
Console.WriteLine("wave total_duration_milliseconds:{0}", total_duration.TotalMilliseconds.ToString());
|
||||
Console.WriteLine("Real time factor (RTF):{0}", rtf.ToString());
|
||||
|
||||
Console.WriteLine("End!");
|
||||
}
|
||||
|
||||
static Dictionary<string, string> GetDict(string[] args, string applicationBase, ref List<string> wavFiles)
|
||||
{
|
||||
Dictionary<string, string> argsDict = new Dictionary<string, string>();
|
||||
foreach (string input in args)
|
||||
{
|
||||
string[] ss = input.Split("=");
|
||||
if (ss.Length == 1)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(ss[0]))
|
||||
{
|
||||
wavFiles.Add(Path.Combine(applicationBase, ss[0].Trim(new char[] { '-', '`', ' ' })));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
argsDict.Add(ss[0].Trim(new char[] { '-', '`', ' ' }).Replace("-", "_"), ss[1].Trim(new char[] { '-', '`', ' ' }));
|
||||
}
|
||||
}
|
||||
return argsDict;
|
||||
}
|
||||
}
|
||||
@@ -1,171 +0,0 @@
|
||||
// See https://aka.ms/new-console-template for more information
|
||||
// Copyright (c) 2023 by manyeyes
|
||||
using SherpaOnnx;
|
||||
/// Please refer to
|
||||
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||
/// to download pre-trained models. That is, you can find encoder-xxx.onnx
|
||||
/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct
|
||||
/// from there.
|
||||
|
||||
/// download model eg:
|
||||
/// (The directory where the application runs)
|
||||
/// [/path/to]=System.AppDomain.CurrentDomain.BaseDirectory
|
||||
/// cd /path/to
|
||||
/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
|
||||
|
||||
/// NuGet for sherpa-onnx
|
||||
/// PM > Install-Package NAudio -version 2.1.0 -Project sherpa-onnx
|
||||
/// PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx
|
||||
|
||||
// transducer Usage:
|
||||
/*
|
||||
.\SherpaOnnx.Examples.exe `
|
||||
--tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt `
|
||||
--encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx `
|
||||
--decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx `
|
||||
--joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx `
|
||||
--num-threads=2 `
|
||||
--decoding-method=modified_beam_search `
|
||||
--debug=false `
|
||||
./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav
|
||||
*/
|
||||
|
||||
internal class OnlineDecodeFile
|
||||
{
|
||||
static void Main(string[] args)
|
||||
{
|
||||
string usage = @"
|
||||
-----------------------------
|
||||
transducer Usage:
|
||||
--tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt `
|
||||
--encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx `
|
||||
--decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx `
|
||||
--joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx `
|
||||
--num-threads=2 `
|
||||
--decoding-method=modified_beam_search `
|
||||
--debug=false `
|
||||
./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav
|
||||
-----------------------------
|
||||
";
|
||||
if (args.Length == 0)
|
||||
{
|
||||
System.Console.WriteLine("Please enter the correct parameters:");
|
||||
System.Console.WriteLine(usage);
|
||||
System.Text.StringBuilder sb = new System.Text.StringBuilder();
|
||||
//args = Console.ReadLine().Split(" ");
|
||||
while (true)
|
||||
{
|
||||
string input = Console.ReadLine();
|
||||
sb.AppendLine(input);
|
||||
if (Console.ReadKey().Key == ConsoleKey.Enter)
|
||||
break;
|
||||
}
|
||||
args = sb.ToString().Split("\r\n");
|
||||
}
|
||||
Console.WriteLine("Started!\n");
|
||||
string? applicationBase = System.AppDomain.CurrentDomain.BaseDirectory;
|
||||
List<string> wavFiles = new List<string>();
|
||||
Dictionary<string, string> argsDict = GetDict(args, applicationBase, ref wavFiles);
|
||||
string decoder = argsDict.ContainsKey("decoder") ? Path.Combine(applicationBase, argsDict["decoder"]) : "";
|
||||
string encoder = argsDict.ContainsKey("encoder") ? Path.Combine(applicationBase, argsDict["encoder"]) : "";
|
||||
string joiner = argsDict.ContainsKey("joiner") ? Path.Combine(applicationBase, argsDict["joiner"]) : "";
|
||||
string paraformer = argsDict.ContainsKey("paraformer") ? Path.Combine(applicationBase, argsDict["paraformer"]) : "";
|
||||
string nemo_ctc = argsDict.ContainsKey("nemo_ctc") ? Path.Combine(applicationBase, argsDict["nemo_ctc"]) : "";
|
||||
string tokens = argsDict.ContainsKey("tokens") ? Path.Combine(applicationBase, argsDict["tokens"]) : "";
|
||||
string num_threads = argsDict.ContainsKey("num_threads") ? argsDict["num_threads"] : "";
|
||||
string decoding_method = argsDict.ContainsKey("decoding_method") ? argsDict["decoding_method"] : "";
|
||||
string debug = argsDict.ContainsKey("debug") ? argsDict["debug"] : "";
|
||||
|
||||
OfflineTransducer offlineTransducer = new OfflineTransducer();
|
||||
offlineTransducer.EncoderFilename = encoder;
|
||||
offlineTransducer.DecoderFilename = decoder;
|
||||
offlineTransducer.JoinerFilename = joiner;
|
||||
|
||||
OfflineParaformer offlineParaformer = new OfflineParaformer();
|
||||
offlineParaformer.Model = paraformer;
|
||||
|
||||
OfflineNemoEncDecCtc offlineNemoEncDecCtc = new OfflineNemoEncDecCtc();
|
||||
offlineNemoEncDecCtc.Model = nemo_ctc;
|
||||
|
||||
int numThreads = 0;
|
||||
int.TryParse(num_threads, out numThreads);
|
||||
bool isDebug = false;
|
||||
bool.TryParse(debug, out isDebug);
|
||||
|
||||
string decodingMethod = string.IsNullOrEmpty(decoding_method) ? "" : decoding_method;
|
||||
|
||||
if ((string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner))
|
||||
&& string.IsNullOrEmpty(paraformer)
|
||||
&& string.IsNullOrEmpty(nemo_ctc))
|
||||
{
|
||||
Console.WriteLine("Please specify at least one model");
|
||||
Console.WriteLine(usage);
|
||||
}
|
||||
// batch decode
|
||||
TimeSpan total_duration = TimeSpan.Zero;
|
||||
TimeSpan start_time = TimeSpan.Zero;
|
||||
TimeSpan end_time = TimeSpan.Zero;
|
||||
List<OfflineRecognizerResultEntity> results = new List<OfflineRecognizerResultEntity>();
|
||||
if (!(string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner)))
|
||||
{
|
||||
OnlineTransducer onlineTransducer = new OnlineTransducer();
|
||||
onlineTransducer.EncoderFilename = encoder;
|
||||
onlineTransducer.DecoderFilename = decoder;
|
||||
onlineTransducer.JoinerFilename = joiner;
|
||||
//test online
|
||||
OnlineRecognizer<OnlineTransducer> onlineRecognizer = new OnlineRecognizer<OnlineTransducer>(
|
||||
onlineTransducer,
|
||||
tokens,
|
||||
num_threads: numThreads,
|
||||
debug: isDebug,
|
||||
decoding_method: decodingMethod);
|
||||
foreach (string wavFile in wavFiles)
|
||||
{
|
||||
TimeSpan duration = TimeSpan.Zero;
|
||||
List<float[]> samplesList = AudioHelper.GetChunkSamplesList(wavFile, ref duration);
|
||||
OnlineStream stream = onlineRecognizer.CreateStream();
|
||||
start_time = new TimeSpan(DateTime.Now.Ticks);
|
||||
for (int i = 0; i < samplesList.Count; i++)
|
||||
{
|
||||
onlineRecognizer.AcceptWaveForm(stream, 16000, samplesList[i]);
|
||||
onlineRecognizer.DecodeStream(stream);
|
||||
OnlineRecognizerResultEntity result_on = onlineRecognizer.GetResult(stream);
|
||||
Console.WriteLine(result_on.text);
|
||||
}
|
||||
total_duration += duration;
|
||||
}
|
||||
end_time = new TimeSpan(DateTime.Now.Ticks);
|
||||
}
|
||||
double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds;
|
||||
double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds;
|
||||
Console.WriteLine("num_threads:{0}", num_threads);
|
||||
Console.WriteLine("decoding_method:{0}", decodingMethod);
|
||||
Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString());
|
||||
Console.WriteLine("wave total_duration_milliseconds:{0}", total_duration.TotalMilliseconds.ToString());
|
||||
Console.WriteLine("Real time factor (RTF):{0}", rtf.ToString());
|
||||
|
||||
Console.WriteLine("End!");
|
||||
}
|
||||
|
||||
static Dictionary<string, string> GetDict(string[] args, string applicationBase, ref List<string> wavFiles)
|
||||
{
|
||||
Dictionary<string, string> argsDict = new Dictionary<string, string>();
|
||||
foreach (string input in args)
|
||||
{
|
||||
string[] ss = input.Split("=");
|
||||
if (ss.Length == 1)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(ss[0]))
|
||||
{
|
||||
wavFiles.Add(Path.Combine(applicationBase, ss[0].Trim(new char[] { '-', '`', ' ' })));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
argsDict.Add(ss[0].Trim(new char[] { '-', '`', ' ' }).Replace("-", "_"), ss[1].Trim(new char[] { '-', '`', ' ' }));
|
||||
}
|
||||
}
|
||||
return argsDict;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,221 +0,0 @@
|
||||
// See https://aka.ms/new-console-template for more information
|
||||
// Copyright (c) 2023 by manyeyes
|
||||
using SherpaOnnx;
|
||||
/// Please refer to
|
||||
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||
/// to download pre-trained models. That is, you can find encoder-xxx.onnx
|
||||
/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct
|
||||
/// from there.
|
||||
|
||||
/// download model eg:
|
||||
/// (The directory where the application runs)
|
||||
/// [/path/to]=System.AppDomain.CurrentDomain.BaseDirectory
|
||||
/// cd /path/to
|
||||
/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
|
||||
|
||||
/// NuGet for sherpa-onnx
|
||||
/// PM > Install-Package NAudio -version 2.1.0 -Project sherpa-onnx
|
||||
/// PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx
|
||||
|
||||
// transducer Usage:
|
||||
/*
|
||||
.\SherpaOnnx.Examples.exe `
|
||||
--tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt `
|
||||
--encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx `
|
||||
--decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx `
|
||||
--joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx `
|
||||
--num-threads=2 `
|
||||
--decoding-method=modified_beam_search `
|
||||
--debug=false `
|
||||
./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav `
|
||||
./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav
|
||||
*/
|
||||
|
||||
internal class OnlineDecodeFiles
|
||||
{
|
||||
static void Main(string[] args)
|
||||
{
|
||||
string usage = @"
|
||||
-----------------------------
|
||||
transducer Usage:
|
||||
--tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt `
|
||||
--encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx `
|
||||
--decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx `
|
||||
--joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx `
|
||||
--num-threads=2 `
|
||||
--decoding-method=modified_beam_search `
|
||||
--debug=false `
|
||||
./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav `
|
||||
./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav
|
||||
-----------------------------
|
||||
";
|
||||
if (args.Length == 0)
|
||||
{
|
||||
System.Console.WriteLine("Please enter the correct parameters:");
|
||||
System.Console.WriteLine(usage);
|
||||
System.Text.StringBuilder sb = new System.Text.StringBuilder();
|
||||
//args = Console.ReadLine().Split(" ");
|
||||
while (true)
|
||||
{
|
||||
string input = Console.ReadLine();
|
||||
sb.AppendLine(input);
|
||||
if (Console.ReadKey().Key == ConsoleKey.Enter)
|
||||
break;
|
||||
}
|
||||
args = sb.ToString().Split("\r\n");
|
||||
}
|
||||
Console.WriteLine("Started!\n");
|
||||
string? applicationBase = System.AppDomain.CurrentDomain.BaseDirectory;
|
||||
List<string> wavFiles = new List<string>();
|
||||
Dictionary<string, string> argsDict = GetDict(args, applicationBase, ref wavFiles);
|
||||
string decoder = argsDict.ContainsKey("decoder") ? Path.Combine(applicationBase, argsDict["decoder"]) : "";
|
||||
string encoder = argsDict.ContainsKey("encoder") ? Path.Combine(applicationBase, argsDict["encoder"]) : "";
|
||||
string joiner = argsDict.ContainsKey("joiner") ? Path.Combine(applicationBase, argsDict["joiner"]) : "";
|
||||
string paraformer = argsDict.ContainsKey("paraformer") ? Path.Combine(applicationBase, argsDict["paraformer"]) : "";
|
||||
string nemo_ctc = argsDict.ContainsKey("nemo_ctc") ? Path.Combine(applicationBase, argsDict["nemo_ctc"]) : "";
|
||||
string tokens = argsDict.ContainsKey("tokens") ? Path.Combine(applicationBase, argsDict["tokens"]) : "";
|
||||
string num_threads = argsDict.ContainsKey("num_threads") ? argsDict["num_threads"] : "";
|
||||
string decoding_method = argsDict.ContainsKey("decoding_method") ? argsDict["decoding_method"] : "";
|
||||
string debug = argsDict.ContainsKey("debug") ? argsDict["debug"] : "";
|
||||
|
||||
OfflineTransducer offlineTransducer = new OfflineTransducer();
|
||||
offlineTransducer.EncoderFilename = encoder;
|
||||
offlineTransducer.DecoderFilename = decoder;
|
||||
offlineTransducer.JoinerFilename = joiner;
|
||||
|
||||
OfflineParaformer offlineParaformer = new OfflineParaformer();
|
||||
offlineParaformer.Model = paraformer;
|
||||
|
||||
OfflineNemoEncDecCtc offlineNemoEncDecCtc = new OfflineNemoEncDecCtc();
|
||||
offlineNemoEncDecCtc.Model = nemo_ctc;
|
||||
|
||||
int numThreads = 0;
|
||||
int.TryParse(num_threads, out numThreads);
|
||||
bool isDebug = false;
|
||||
bool.TryParse(debug, out isDebug);
|
||||
|
||||
string decodingMethod = string.IsNullOrEmpty(decoding_method) ? "" : decoding_method;
|
||||
|
||||
if ((string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner))
|
||||
&& string.IsNullOrEmpty(paraformer)
|
||||
&& string.IsNullOrEmpty(nemo_ctc))
|
||||
{
|
||||
Console.WriteLine("Please specify at least one model");
|
||||
Console.WriteLine(usage);
|
||||
}
|
||||
// batch decode
|
||||
TimeSpan total_duration = TimeSpan.Zero;
|
||||
TimeSpan start_time = TimeSpan.Zero;
|
||||
TimeSpan end_time = TimeSpan.Zero;
|
||||
List<OnlineRecognizerResultEntity> results = new List<OnlineRecognizerResultEntity>();
|
||||
if (!(string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner)))
|
||||
{
|
||||
OnlineTransducer onlineTransducer = new OnlineTransducer();
|
||||
onlineTransducer.EncoderFilename = encoder;
|
||||
onlineTransducer.DecoderFilename = decoder;
|
||||
onlineTransducer.JoinerFilename = joiner;
|
||||
//test online
|
||||
OnlineRecognizer<OnlineTransducer> onlineRecognizer = new OnlineRecognizer<OnlineTransducer>(
|
||||
onlineTransducer,
|
||||
tokens,
|
||||
num_threads: numThreads,
|
||||
debug: isDebug,
|
||||
decoding_method: decodingMethod);
|
||||
List<float[]> samplesList = new List<float[]>();
|
||||
foreach (string wavFile in wavFiles)
|
||||
{
|
||||
TimeSpan duration = TimeSpan.Zero;
|
||||
float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration);
|
||||
samplesList.Add(samples);
|
||||
total_duration += duration;
|
||||
}
|
||||
start_time = new TimeSpan(DateTime.Now.Ticks);
|
||||
List<OnlineStream> streams = new List<OnlineStream>();
|
||||
foreach (float[] samples in samplesList)
|
||||
{
|
||||
OnlineStream stream = onlineRecognizer.CreateStream();
|
||||
onlineRecognizer.AcceptWaveForm(stream, 16000, samples);
|
||||
streams.Add(stream);
|
||||
onlineRecognizer.InputFinished(stream);
|
||||
}
|
||||
onlineRecognizer.DecodeMultipleStreams(streams);
|
||||
results = onlineRecognizer.GetResults(streams);
|
||||
foreach (OnlineRecognizerResultEntity result in results)
|
||||
{
|
||||
Console.WriteLine(result.text);
|
||||
}
|
||||
end_time = new TimeSpan(DateTime.Now.Ticks);
|
||||
}
|
||||
|
||||
|
||||
foreach (var item in results.Zip<OnlineRecognizerResultEntity, string>(wavFiles))
|
||||
{
|
||||
Console.WriteLine("wavFile:{0}", item.Second);
|
||||
Console.WriteLine("text:{0}", item.First.text.ToLower());
|
||||
Console.WriteLine("text_len:{0}\n", item.First.text_len.ToString());
|
||||
}
|
||||
|
||||
double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds;
|
||||
double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds;
|
||||
Console.WriteLine("num_threads:{0}", num_threads);
|
||||
Console.WriteLine("decoding_method:{0}", decodingMethod);
|
||||
Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString());
|
||||
Console.WriteLine("wave total_duration_milliseconds:{0}", total_duration.TotalMilliseconds.ToString());
|
||||
Console.WriteLine("Real time factor (RTF):{0}", rtf.ToString());
|
||||
|
||||
Console.WriteLine("End!");
|
||||
}
|
||||
|
||||
public void AnotherWayOfDecodeFiles(string encoder, string decoder, string joiner, string tokens, int numThreads, bool isDebug, string decodingMethod, List<string> wavFiles, ref TimeSpan total_duration)
|
||||
{
|
||||
OnlineTransducer onlineTransducer = new OnlineTransducer();
|
||||
onlineTransducer.EncoderFilename = encoder;
|
||||
onlineTransducer.DecoderFilename = decoder;
|
||||
onlineTransducer.JoinerFilename = joiner;
|
||||
//test online
|
||||
OnlineRecognizer<OnlineTransducer> onlineRecognizer = new OnlineRecognizer<OnlineTransducer>(
|
||||
onlineTransducer,
|
||||
tokens,
|
||||
num_threads: numThreads,
|
||||
debug: isDebug,
|
||||
decoding_method: decodingMethod);
|
||||
List<float[]> samplesList = new List<float[]>();
|
||||
foreach (string wavFile in wavFiles)
|
||||
{
|
||||
TimeSpan duration = TimeSpan.Zero;
|
||||
float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration);
|
||||
samplesList.Add(samples);
|
||||
total_duration += duration;
|
||||
}
|
||||
TimeSpan start_time = new TimeSpan(DateTime.Now.Ticks);
|
||||
List<OnlineStream> streams = onlineRecognizer.CreateStreams(samplesList);
|
||||
onlineRecognizer.DecodeMultipleStreams(streams);
|
||||
List<OnlineRecognizerResultEntity> results = onlineRecognizer.GetResults(streams);
|
||||
foreach (OnlineRecognizerResultEntity result in results)
|
||||
{
|
||||
Console.WriteLine(result.text);
|
||||
}
|
||||
TimeSpan end_time = new TimeSpan(DateTime.Now.Ticks);
|
||||
}
|
||||
|
||||
static Dictionary<string, string> GetDict(string[] args, string applicationBase, ref List<string> wavFiles)
|
||||
{
|
||||
Dictionary<string, string> argsDict = new Dictionary<string, string>();
|
||||
foreach (string input in args)
|
||||
{
|
||||
string[] ss = input.Split("=");
|
||||
if (ss.Length == 1)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(ss[0]))
|
||||
{
|
||||
wavFiles.Add(Path.Combine(applicationBase, ss[0].Trim(new char[] { '-', '`', ' ' })));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
argsDict.Add(ss[0].Trim(new char[] { '-', '`', ' ' }).Replace("-", "_"), ss[1].Trim(new char[] { '-', '`', ' ' }));
|
||||
}
|
||||
}
|
||||
return argsDict;
|
||||
}
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
#ProjectReference csharp-api
|
||||
`<ProjectReference Include="..\SherpaOnnx\SherpaOnnx.csproj" />`
|
||||
The location of the 'SherpaOnnx' file is ../sherpa-onnx/csharp-api.
|
||||
This C # API is cross platform and you can compile it yourself in Windows, Mac OS, and Linux environments.
|
||||
|
||||
------------
|
||||
Alternatively, install sherpaonnx through nuget.
|
||||
#NuGet for sherpa-onnx
|
||||
PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx
|
||||
@@ -1,67 +0,0 @@
|
||||
using NAudio.Wave;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
/// <summary>
|
||||
/// audio processing
|
||||
/// Copyright (c) 2023 by manyeyes
|
||||
/// </summary>
|
||||
public class AudioHelper
|
||||
{
|
||||
public static float[] GetFileSamples(string wavFilePath, ref TimeSpan duration)
|
||||
{
|
||||
if (!File.Exists(wavFilePath))
|
||||
{
|
||||
Trace.Assert(File.Exists(wavFilePath), "file does not exist:" + wavFilePath);
|
||||
return new float[1];
|
||||
}
|
||||
AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath);
|
||||
byte[] datas = new byte[_audioFileReader.Length];
|
||||
_audioFileReader.Read(datas, 0, datas.Length);
|
||||
duration = _audioFileReader.TotalTime;
|
||||
float[] wavdata = new float[datas.Length / sizeof(float)];
|
||||
Buffer.BlockCopy(datas, 0, wavdata, 0, datas.Length);
|
||||
return wavdata;
|
||||
}
|
||||
|
||||
public static List<float[]> GetChunkSamplesList(string wavFilePath, ref TimeSpan duration)
|
||||
{
|
||||
List<float[]> wavdatas = new List<float[]>();
|
||||
if (!File.Exists(wavFilePath))
|
||||
{
|
||||
Trace.Assert(File.Exists(wavFilePath), "file does not exist:" + wavFilePath);
|
||||
wavdatas.Add(new float[1]);
|
||||
return wavdatas;
|
||||
}
|
||||
AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath);
|
||||
byte[] datas = new byte[_audioFileReader.Length];
|
||||
int chunkSize = 16000;// datas.Length / sizeof(float);
|
||||
int chunkNum = (int)Math.Ceiling((double)datas.Length / chunkSize);
|
||||
for (int i = 0; i < chunkNum; i++)
|
||||
{
|
||||
int offset = 0;
|
||||
int dataCount = 0;
|
||||
if (Math.Abs(datas.Length - i * chunkSize) > chunkSize)
|
||||
{
|
||||
offset = i * chunkSize;
|
||||
dataCount = chunkSize;
|
||||
}
|
||||
else
|
||||
{
|
||||
offset = i * chunkSize;
|
||||
dataCount = datas.Length - i * chunkSize;
|
||||
}
|
||||
_audioFileReader.Read(datas, offset, dataCount);
|
||||
duration += _audioFileReader.TotalTime;
|
||||
float[] wavdata = new float[chunkSize / sizeof(float)];
|
||||
Buffer.BlockCopy(datas, offset, wavdata, 0, dataCount);
|
||||
wavdatas.Add(wavdata);
|
||||
|
||||
}
|
||||
return wavdatas;
|
||||
}
|
||||
}
|
||||
13
dotnet-examples/.editorconfig
Normal file
13
dotnet-examples/.editorconfig
Normal file
@@ -0,0 +1,13 @@
|
||||
# top-most EditorConfig file
|
||||
root = true
|
||||
|
||||
# Don't use tabs for indentation.
|
||||
[*]
|
||||
indent_style = space
|
||||
|
||||
# Code files
|
||||
[*.{cs,csx,vb,vbx}]
|
||||
indent_size = 2
|
||||
insert_final_newline = true
|
||||
charset = utf-8-bom
|
||||
end_of_line = crlf
|
||||
2
dotnet-examples/.gitignore
vendored
Normal file
2
dotnet-examples/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
bin
|
||||
obj
|
||||
179
dotnet-examples/offline-decode-files/Program.cs
Normal file
179
dotnet-examples/offline-decode-files/Program.cs
Normal file
@@ -0,0 +1,179 @@
|
||||
// Copyright (c) 2023 Xiaomi Corporation
|
||||
// Copyright (c) 2023 by manyeyes
|
||||
//
|
||||
// This file shows how to use a non-streaming model to decode files
|
||||
// Please refer to
|
||||
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||
// to download non-streaming models
|
||||
using CommandLine.Text;
|
||||
using CommandLine;
|
||||
using SherpaOnnx;
|
||||
using System.Collections.Generic;
|
||||
using System;
|
||||
|
||||
class OfflineDecodeFiles
|
||||
{
|
||||
class Options
|
||||
{
|
||||
[Option(Required = false, HelpText = "Path to tokens.txt")]
|
||||
public string Tokens { get; set; }
|
||||
|
||||
[Option(Required = false, HelpText = "Path to encoder.onnx. Used only for transducer models")]
|
||||
public string Encoder { get; set; }
|
||||
|
||||
[Option(Required = false, HelpText = "Path to decoder.onnx. Used only for transducer models")]
|
||||
public string Decoder { get; set; }
|
||||
|
||||
[Option(Required = false, HelpText = "Path to joiner.onnx. Used only for transducer models")]
|
||||
public string Joiner { get; set; }
|
||||
|
||||
[Option(Required = false, HelpText = "Path to model.onnx. Used only for paraformer models")]
|
||||
public string Paraformer { get; set; }
|
||||
|
||||
[Option("nemo-ctc", Required = false, HelpText = "Path to model.onnx. Used only for NeMo CTC models")]
|
||||
public string NeMoCtc { get; set; }
|
||||
|
||||
[Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
|
||||
public int NumThreads { get; set; }
|
||||
|
||||
[Option("decoding-method", Required = false, Default = "greedy_search",
|
||||
HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")]
|
||||
public string DecodingMethod { get; set; }
|
||||
|
||||
[Option("max-active-paths", Required = false, Default = 4,
|
||||
HelpText = @"Used only when --decoding--method is modified_beam_search.
|
||||
It specifies number of active paths to keep during the search")]
|
||||
public int MaxActivePaths { get; set; }
|
||||
|
||||
[Option("files", Required = true, HelpText = "Audio files for decoding")]
|
||||
public IEnumerable<string> Files { get; set; }
|
||||
}
|
||||
|
||||
static void Main(string[] args)
|
||||
{
|
||||
var parser = new CommandLine.Parser(with => with.HelpWriter = null);
|
||||
var parserResult = parser.ParseArguments<Options>(args);
|
||||
|
||||
parserResult
|
||||
.WithParsed<Options>(options => Run(options))
|
||||
.WithNotParsed(errs => DisplayHelp(parserResult, errs));
|
||||
}
|
||||
|
||||
private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
|
||||
{
|
||||
string usage = @"
|
||||
# Zipformer
|
||||
|
||||
dotnet run \
|
||||
--tokens=./sherpa-onnx-zipformer-en-2023-04-01/tokens.txt \
|
||||
--encoder=./sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.onnx \
|
||||
--decoder=./sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.onnx \
|
||||
--joiner=./sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.onnx \
|
||||
--files ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/0.wav \
|
||||
./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/1.wav \
|
||||
./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/8k.wav
|
||||
|
||||
Please refer to
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/index.html
|
||||
to download pre-trained non-streaming zipformer models.
|
||||
|
||||
# Paraformer
|
||||
|
||||
dotnet run \
|
||||
--tokens=./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \
|
||||
--paraformer=./sherpa-onnx-paraformer-zh-2023-03-28/model.onnx \
|
||||
--files ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/0.wav \
|
||||
./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav \
|
||||
./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/1.wav \
|
||||
./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/2.wav \
|
||||
./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/8k.wav
|
||||
|
||||
Please refer to
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html
|
||||
to download pre-trained paraformer models
|
||||
|
||||
# NeMo CTC
|
||||
|
||||
dotnet run \
|
||||
--tokens=./sherpa-onnx-nemo-ctc-en-conformer-medium/tokens.txt \
|
||||
--nemo-ctc=./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \
|
||||
--num-threads=1 \
|
||||
--files ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/0.wav \
|
||||
./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/1.wav \
|
||||
./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/8k.wav
|
||||
|
||||
Please refer to
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/index.html
|
||||
to download pre-trained paraformer models
|
||||
";
|
||||
|
||||
var helpText = HelpText.AutoBuild(result, h =>
|
||||
{
|
||||
h.AdditionalNewLineAfterOption = false;
|
||||
h.Heading = usage;
|
||||
h.Copyright = "Copyright (c) 2023 Xiaomi Corporation";
|
||||
return HelpText.DefaultParsingErrorsHandler(result, h);
|
||||
}, e => e);
|
||||
Console.WriteLine(helpText);
|
||||
}
|
||||
|
||||
private static void Run(Options options)
|
||||
{
|
||||
OfflineRecognizerConfig config = new OfflineRecognizerConfig();
|
||||
config.ModelConfig.Tokens = options.Tokens;
|
||||
|
||||
if (!String.IsNullOrEmpty(options.Encoder))
|
||||
{
|
||||
// this is a transducer model
|
||||
config.ModelConfig.Transducer.Encoder = options.Encoder;
|
||||
config.ModelConfig.Transducer.Decoder = options.Decoder;
|
||||
config.ModelConfig.Transducer.Joiner = options.Joiner;
|
||||
}
|
||||
else if (!String.IsNullOrEmpty(options.Paraformer))
|
||||
{
|
||||
config.ModelConfig.Paraformer.Model = options.Paraformer;
|
||||
}
|
||||
else if (!String.IsNullOrEmpty(options.NeMoCtc))
|
||||
{
|
||||
config.ModelConfig.NeMoCtc.Model = options.NeMoCtc;
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine("Please provide a model");
|
||||
return;
|
||||
}
|
||||
|
||||
config.DecodingMethod = options.DecodingMethod;
|
||||
config.MaxActivePaths = options.MaxActivePaths;
|
||||
config.ModelConfig.Debug = 0;
|
||||
|
||||
OfflineRecognizer recognizer = new OfflineRecognizer(config);
|
||||
|
||||
string[] files = options.Files.ToArray();
|
||||
|
||||
// We create a separate stream for each file
|
||||
List<OfflineStream> streams = new List<OfflineStream>();
|
||||
streams.EnsureCapacity(files.Length);
|
||||
|
||||
for (int i = 0; i != files.Length; ++i)
|
||||
{
|
||||
OfflineStream s = recognizer.CreateStream();
|
||||
|
||||
WaveReader waveReader = new WaveReader(files[i]);
|
||||
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
|
||||
streams.Add(s);
|
||||
}
|
||||
|
||||
recognizer.Decode(streams);
|
||||
|
||||
// display results
|
||||
for (int i = 0; i != files.Length; ++i)
|
||||
{
|
||||
var text = streams[i].Result.Text;
|
||||
Console.WriteLine("--------------------");
|
||||
Console.WriteLine(files[i]);
|
||||
Console.WriteLine(text);
|
||||
}
|
||||
Console.WriteLine("--------------------");
|
||||
}
|
||||
}
|
||||
1
dotnet-examples/offline-decode-files/WaveReader.cs
Symbolic link
1
dotnet-examples/offline-decode-files/WaveReader.cs
Symbolic link
@@ -0,0 +1 @@
|
||||
../online-decode-files/WaveReader.cs
|
||||
@@ -1,20 +1,16 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net6.0</TargetFramework>
|
||||
<RootNamespace>sherpa_onnx</RootNamespace>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<StartupObject>OnlineDecodeFiles</StartupObject>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="NAudio" Version="2.1.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\SherpaOnnx\SherpaOnnx.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net6.0</TargetFramework>
|
||||
<RootNamespace>offline_decode_files</RootNamespace>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="CommandLineParser" Version="2.9.1" />
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
16
dotnet-examples/offline-decode-files/run-nemo-ctc.sh
Executable file
16
dotnet-examples/offline-decode-files/run-nemo-ctc.sh
Executable file
@@ -0,0 +1,16 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
if [ ! -d ./sherpa-onnx-nemo-ctc-en-conformer-medium ]; then
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-medium
|
||||
cd sherpa-onnx-nemo-ctc-en-conformer-medium
|
||||
git lfs pull --include "*.onnx"
|
||||
cd ..
|
||||
fi
|
||||
|
||||
dotnet run \
|
||||
--tokens=./sherpa-onnx-nemo-ctc-en-conformer-medium/tokens.txt \
|
||||
--nemo-ctc=./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \
|
||||
--num-threads=1 \
|
||||
--files ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/0.wav \
|
||||
./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/1.wav \
|
||||
./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/8k.wav
|
||||
17
dotnet-examples/offline-decode-files/run-paraformer.sh
Executable file
17
dotnet-examples/offline-decode-files/run-paraformer.sh
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
if [ ! -d ./sherpa-onnx-paraformer-zh-2023-03-28 ]; then
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28
|
||||
cd sherpa-onnx-paraformer-zh-2023-03-28
|
||||
git lfs pull --include "*.onnx"
|
||||
cd ..
|
||||
fi
|
||||
|
||||
dotnet run \
|
||||
--tokens=./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \
|
||||
--paraformer=./sherpa-onnx-paraformer-zh-2023-03-28/model.onnx \
|
||||
--num-threads=2 \
|
||||
--files ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav \
|
||||
./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/1.wav \
|
||||
./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/2.wav \
|
||||
./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/8k.wav
|
||||
19
dotnet-examples/offline-decode-files/run-zipformer.sh
Executable file
19
dotnet-examples/offline-decode-files/run-zipformer.sh
Executable file
@@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-en-2023-04-01
|
||||
cd sherpa-onnx-zipformer-en-2023-04-01
|
||||
git lfs pull --include "*.onnx"
|
||||
cd ..
|
||||
fi
|
||||
|
||||
dotnet run \
|
||||
--tokens=./sherpa-onnx-zipformer-en-2023-04-01/tokens.txt \
|
||||
--encoder=./sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.onnx \
|
||||
--decoder=./sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.onnx \
|
||||
--joiner=./sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.onnx \
|
||||
--num-threads=2 \
|
||||
--decoding-method=modified_beam_search \
|
||||
--files ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/0.wav \
|
||||
./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/1.wav \
|
||||
./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/8k.wav
|
||||
181
dotnet-examples/online-decode-files/Program.cs
Normal file
181
dotnet-examples/online-decode-files/Program.cs
Normal file
@@ -0,0 +1,181 @@
|
||||
// Copyright (c) 2023 Xiaomi Corporation
|
||||
// Copyright (c) 2023 by manyeyes
|
||||
//
|
||||
// This file shows how to use a streaming model to decode files
|
||||
// Please refer to
|
||||
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html
|
||||
// to download streaming models
|
||||
|
||||
using CommandLine.Text;
|
||||
using CommandLine;
|
||||
using SherpaOnnx;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System;
|
||||
|
||||
class OnlineDecodeFiles
|
||||
{
|
||||
class Options
|
||||
{
|
||||
[Option(Required = true, HelpText = "Path to tokens.txt")]
|
||||
public string Tokens { get; set; }
|
||||
|
||||
[Option(Required = true, HelpText = "Path to encoder.onnx")]
|
||||
public string Encoder { get; set; }
|
||||
|
||||
[Option(Required = true, HelpText = "Path to decoder.onnx")]
|
||||
public string Decoder { get; set; }
|
||||
|
||||
[Option(Required = true, HelpText = "Path to joiner.onnx")]
|
||||
public string Joiner { get; set; }
|
||||
|
||||
[Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
|
||||
public int NumThreads { get; set; }
|
||||
|
||||
[Option("decoding-method", Required = false, Default = "greedy_search",
|
||||
HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")]
|
||||
public string DecodingMethod { get; set; }
|
||||
|
||||
[Option(Required = false, Default = false, HelpText = "True to show model info during loading")]
|
||||
public bool Debug { get; set; }
|
||||
|
||||
[Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")]
|
||||
public int SampleRate { get; set; }
|
||||
|
||||
[Option("max-active-paths", Required = false, Default = 4,
|
||||
HelpText = @"Used only when --decoding--method is modified_beam_search.
|
||||
It specifies number of active paths to keep during the search")]
|
||||
public int MaxActivePaths { get; set; }
|
||||
|
||||
[Option("enable-endpoint", Required = false, Default = false,
|
||||
HelpText = "True to enable endpoint detection.")]
|
||||
public bool EnableEndpoint { get; set; }
|
||||
|
||||
[Option("rule1-min-trailing-silence", Required = false, Default = 2.4F,
|
||||
HelpText = @"An endpoint is detected if trailing silence in seconds is
|
||||
larger than this value even if nothing has been decoded. Used only when --enable-endpoint is true.")]
|
||||
public float Rule1MinTrailingSilence { get; set; }
|
||||
|
||||
[Option("rule2-min-trailing-silence", Required = false, Default = 1.2F,
|
||||
HelpText = @"An endpoint is detected if trailing silence in seconds is
|
||||
larger than this value after something that is not blank has been decoded. Used
|
||||
only when --enable-endpoint is true.")]
|
||||
public float Rule2MinTrailingSilence { get; set; }
|
||||
|
||||
[Option("rule3-min-utterance-length", Required = false, Default = 20.0F,
|
||||
HelpText = @"An endpoint is detected if the utterance in seconds is
|
||||
larger than this value. Used only when --enable-endpoint is true.")]
|
||||
public float Rule3MinUtteranceLength { get; set; }
|
||||
|
||||
[Option("files", Required = true, HelpText = "Audio files for decoding")]
|
||||
public IEnumerable<string> Files { get; set; }
|
||||
|
||||
}
|
||||
|
||||
static void Main(string[] args)
|
||||
{
|
||||
var parser = new CommandLine.Parser(with => with.HelpWriter = null);
|
||||
var parserResult = parser.ParseArguments<Options>(args);
|
||||
|
||||
parserResult
|
||||
.WithParsed<Options>(options => Run(options))
|
||||
.WithNotParsed(errs => DisplayHelp(parserResult, errs));
|
||||
}
|
||||
|
||||
private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
|
||||
{
|
||||
string usage = @"
|
||||
dotnet run \
|
||||
--tokens=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
|
||||
--encoder=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx \
|
||||
--decoder=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
|
||||
--joiner=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx \
|
||||
--num-threads=2 \
|
||||
--decoding-method=modified_beam_search \
|
||||
--debug=false \
|
||||
./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav \
|
||||
./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav
|
||||
|
||||
Please refer to
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
|
||||
to download pre-trained streaming models.
|
||||
";
|
||||
|
||||
var helpText = HelpText.AutoBuild(result, h =>
|
||||
{
|
||||
h.AdditionalNewLineAfterOption = false;
|
||||
h.Heading = usage;
|
||||
h.Copyright = "Copyright (c) 2023 Xiaomi Corporation";
|
||||
return HelpText.DefaultParsingErrorsHandler(result, h);
|
||||
}, e => e);
|
||||
Console.WriteLine(helpText);
|
||||
}
|
||||
|
||||
private static void Run(Options options)
|
||||
{
|
||||
OnlineRecognizerConfig config = new OnlineRecognizerConfig();
|
||||
config.FeatConfig.SampleRate = options.SampleRate;
|
||||
|
||||
// All models from icefall using feature dim 80.
|
||||
// You can change it if your model has a different feature dim.
|
||||
config.FeatConfig.FeatureDim = 80;
|
||||
|
||||
config.TransducerModelConfig.Encoder = options.Encoder;
|
||||
config.TransducerModelConfig.Decoder = options.Decoder;
|
||||
config.TransducerModelConfig.Joiner = options.Joiner;
|
||||
config.TransducerModelConfig.Tokens = options.Tokens;
|
||||
config.TransducerModelConfig.NumThreads = options.NumThreads;
|
||||
config.TransducerModelConfig.Debug = options.Debug ? 1 : 0;
|
||||
|
||||
config.DecodingMethod = options.DecodingMethod;
|
||||
config.MaxActivePaths = options.MaxActivePaths;
|
||||
config.EnableEndpoint = options.EnableEndpoint ? 1 : 0;
|
||||
|
||||
config.Rule1MinTrailingSilence = options.Rule1MinTrailingSilence;
|
||||
config.Rule2MinTrailingSilence = options.Rule2MinTrailingSilence;
|
||||
config.Rule3MinUtteranceLength = options.Rule3MinUtteranceLength;
|
||||
|
||||
OnlineRecognizer recognizer = new OnlineRecognizer(config);
|
||||
|
||||
string[] files = options.Files.ToArray();
|
||||
|
||||
// We create a separate stream for each file
|
||||
List<OnlineStream> streams = new List<OnlineStream>();
|
||||
streams.EnsureCapacity(files.Length);
|
||||
|
||||
for (int i = 0; i != files.Length; ++i)
|
||||
{
|
||||
OnlineStream s = recognizer.CreateStream();
|
||||
|
||||
WaveReader waveReader = new WaveReader(files[i]);
|
||||
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
|
||||
|
||||
float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
|
||||
s.AcceptWaveform(waveReader.SampleRate, tailPadding);
|
||||
s.InputFinished();
|
||||
|
||||
streams.Add(s);
|
||||
}
|
||||
|
||||
while (true)
|
||||
{
|
||||
var readyStreams = streams.Where(s => recognizer.IsReady(s));
|
||||
if (!readyStreams.Any())
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
recognizer.Decode(readyStreams);
|
||||
}
|
||||
|
||||
// display results
|
||||
for (int i = 0; i != files.Length; ++i)
|
||||
{
|
||||
var text = recognizer.GetResult(streams[i]).Text;
|
||||
Console.WriteLine("--------------------");
|
||||
Console.WriteLine(files[i]);
|
||||
Console.WriteLine(text);
|
||||
}
|
||||
Console.WriteLine("--------------------");
|
||||
}
|
||||
}
|
||||
174
dotnet-examples/online-decode-files/WaveReader.cs
Normal file
174
dotnet-examples/online-decode-files/WaveReader.cs
Normal file
@@ -0,0 +1,174 @@
|
||||
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
using System;
|
||||
using System.IO;
|
||||
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace SherpaOnnx
|
||||
{
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
public struct WaveHeader
|
||||
{
|
||||
public Int32 ChunkID;
|
||||
public Int32 ChunkSize;
|
||||
public Int32 Format;
|
||||
public Int32 SubChunk1ID;
|
||||
public Int32 SubChunk1Size;
|
||||
public Int16 AudioFormat;
|
||||
public Int16 NumChannels;
|
||||
public Int32 SampleRate;
|
||||
public Int32 ByteRate;
|
||||
public Int16 BlockAlign;
|
||||
public Int16 BitsPerSample;
|
||||
public Int32 SubChunk2ID;
|
||||
public Int32 SubChunk2Size;
|
||||
|
||||
public bool Validate()
|
||||
{
|
||||
if (ChunkID != 0x46464952)
|
||||
{
|
||||
Console.WriteLine($"Invalid chunk ID: 0x{ChunkID:X}. Expect 0x46464952");
|
||||
return false;
|
||||
}
|
||||
|
||||
// E V A W
|
||||
if (Format != 0x45564157)
|
||||
{
|
||||
Console.WriteLine($"Invalid format: 0x{Format:X}. Expect 0x45564157");
|
||||
return false;
|
||||
}
|
||||
|
||||
// t m f
|
||||
if (SubChunk1ID != 0x20746d66)
|
||||
{
|
||||
Console.WriteLine($"Invalid SubChunk1ID: 0x{SubChunk1ID:X}. Expect 0x20746d66");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (SubChunk1Size != 16)
|
||||
{
|
||||
Console.WriteLine($"Invalid SubChunk1Size: {SubChunk1Size}. Expect 16");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (AudioFormat != 1)
|
||||
{
|
||||
Console.WriteLine($"Invalid AudioFormat: {AudioFormat}. Expect 1");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (NumChannels != 1)
|
||||
{
|
||||
Console.WriteLine($"Invalid NumChannels: {NumChannels}. Expect 1");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ByteRate != (SampleRate * NumChannels * BitsPerSample / 8))
|
||||
{
|
||||
Console.WriteLine($"Invalid byte rate: {ByteRate}.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (BlockAlign != (NumChannels * BitsPerSample / 8))
|
||||
{
|
||||
Console.WriteLine($"Invalid block align: {ByteRate}.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (BitsPerSample != 16)
|
||||
{ // we support only 16 bits per sample
|
||||
Console.WriteLine($"Invalid bits per sample: {BitsPerSample}. Expect 16");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// It supports only 16-bit, single channel WAVE format.
|
||||
// The sample rate can be any value.
|
||||
public class WaveReader
|
||||
{
|
||||
public WaveReader(String fileName)
|
||||
{
|
||||
if (!File.Exists(fileName))
|
||||
{
|
||||
throw new ApplicationException($"{fileName} does not exist!");
|
||||
}
|
||||
|
||||
using (var stream = File.Open(fileName, FileMode.Open))
|
||||
{
|
||||
using (var reader = new BinaryReader(stream))
|
||||
{
|
||||
_header = ReadHeader(reader);
|
||||
|
||||
if (!_header.Validate())
|
||||
{
|
||||
throw new ApplicationException($"Invalid wave file ${fileName}");
|
||||
}
|
||||
|
||||
SkipMetaData(reader);
|
||||
|
||||
// now read samples
|
||||
// _header.SubChunk2Size contains number of bytes in total.
|
||||
// we assume each sample is of type int16
|
||||
byte[] buffer = reader.ReadBytes(_header.SubChunk2Size);
|
||||
short[] samples_int16 = new short[_header.SubChunk2Size / 2];
|
||||
Buffer.BlockCopy(buffer, 0, samples_int16, 0, buffer.Length);
|
||||
|
||||
_samples = new float[samples_int16.Length];
|
||||
|
||||
for (var i = 0; i < samples_int16.Length; ++i)
|
||||
{
|
||||
_samples[i] = samples_int16[i] / 32768.0F;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static WaveHeader ReadHeader(BinaryReader reader)
|
||||
{
|
||||
byte[] bytes = reader.ReadBytes(Marshal.SizeOf(typeof(WaveHeader)));
|
||||
|
||||
GCHandle handle = GCHandle.Alloc(bytes, GCHandleType.Pinned);
|
||||
WaveHeader header = (WaveHeader)Marshal.PtrToStructure(handle.AddrOfPinnedObject(), typeof(WaveHeader))!;
|
||||
handle.Free();
|
||||
|
||||
return header;
|
||||
}
|
||||
|
||||
private void SkipMetaData(BinaryReader reader)
|
||||
{
|
||||
var bs = reader.BaseStream;
|
||||
|
||||
Int32 subChunk2ID = _header.SubChunk2ID;
|
||||
Int32 subChunk2Size = _header.SubChunk2Size;
|
||||
|
||||
while (bs.Position != bs.Length && subChunk2ID != 0x61746164)
|
||||
{
|
||||
bs.Seek(subChunk2Size, SeekOrigin.Current);
|
||||
subChunk2ID = reader.ReadInt32();
|
||||
subChunk2Size = reader.ReadInt32();
|
||||
}
|
||||
_header.SubChunk2ID = subChunk2ID;
|
||||
_header.SubChunk2Size = subChunk2Size;
|
||||
}
|
||||
|
||||
private WaveHeader _header;
|
||||
|
||||
// Samples are normalized to the range [-1, 1]
|
||||
private float[] _samples;
|
||||
|
||||
public int SampleRate => _header.SampleRate;
|
||||
public float[] Samples => _samples;
|
||||
|
||||
public static void Test(String fileName)
|
||||
{
|
||||
WaveReader reader = new WaveReader(fileName);
|
||||
Console.WriteLine($"samples length: {reader.Samples.Length}");
|
||||
Console.WriteLine($"samples rate: {reader.SampleRate}");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net6.0</TargetFramework>
|
||||
<RootNamespace>online_decode_files</RootNamespace>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="CommandLineParser" Version="2.9.1" />
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
21
dotnet-examples/online-decode-files/run.sh
Executable file
21
dotnet-examples/online-decode-files/run.sh
Executable file
@@ -0,0 +1,21 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Please refer to
|
||||
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
|
||||
# to download the model files
|
||||
|
||||
if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
|
||||
cd sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
|
||||
git lfs pull --include "*.onnx"
|
||||
cd ..
|
||||
fi
|
||||
|
||||
dotnet run -c Release \
|
||||
--tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
|
||||
--encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \
|
||||
--decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx \
|
||||
--joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \
|
||||
--decoding-method greedy_search \
|
||||
--files ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav \
|
||||
./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav \
|
||||
28
dotnet-examples/sherpa-onnx.sln
Normal file
28
dotnet-examples/sherpa-onnx.sln
Normal file
@@ -0,0 +1,28 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio Version 17
|
||||
VisualStudioVersion = 17.0.31903.59
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "online-decode-files", "online-decode-files\online-decode-files.csproj", "{45307474-BECB-4ABE-9388-D01D55A1A9BE}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-decode-files", "offline-decode-files\offline-decode-files.csproj", "{2DAB152C-9E24-47A0-9DB0-781297ECE458}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
Release|Any CPU = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{45307474-BECB-4ABE-9388-D01D55A1A9BE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{45307474-BECB-4ABE-9388-D01D55A1A9BE}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{45307474-BECB-4ABE-9388-D01D55A1A9BE}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{45307474-BECB-4ABE-9388-D01D55A1A9BE}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{2DAB152C-9E24-47A0-9DB0-781297ECE458}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{2DAB152C-9E24-47A0-9DB0-781297ECE458}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{2DAB152C-9E24-47A0-9DB0-781297ECE458}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{2DAB152C-9E24-47A0-9DB0-781297ECE458}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
5
scripts/dotnet/.gitignore
vendored
Normal file
5
scripts/dotnet/.gitignore
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
all
|
||||
macos
|
||||
linux
|
||||
windows
|
||||
packages
|
||||
17
scripts/dotnet/README.md
Normal file
17
scripts/dotnet/README.md
Normal file
@@ -0,0 +1,17 @@
|
||||
# Introduction
|
||||
|
||||
[sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx) is an open-source
|
||||
real-time speech recognition toolkit developed
|
||||
by the Next-gen Kaldi team.
|
||||
|
||||
It supports streaming recognition on a variety of
|
||||
platforms such as Android, iOS, Raspberry, Linux, Windows, macOS, etc.
|
||||
|
||||
It does not require Internet connection during recognition.
|
||||
|
||||
See the documentation https://k2-fsa.github.io/sherpa/onnx/index.html
|
||||
for details.
|
||||
|
||||
Please see
|
||||
https://github.com/k2-fsa/sherpa-onnx/tree/dot-net/dotnet-examples
|
||||
for how to use C# APIs of this package.
|
||||
118
scripts/dotnet/generate.py
Executable file
118
scripts/dotnet/generate.py
Executable file
@@ -0,0 +1,118 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (c) 2023 Xiaomi Corporation
|
||||
|
||||
import argparse
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import jinja2
|
||||
|
||||
SHERPA_ONNX_DIR = Path(__file__).resolve().parent.parent.parent
|
||||
|
||||
|
||||
def get_version():
|
||||
cmake_file = SHERPA_ONNX_DIR / "CMakeLists.txt"
|
||||
with open(cmake_file) as f:
|
||||
content = f.read()
|
||||
|
||||
version = re.search(r"set\(SHERPA_ONNX_VERSION (.*)\)", content).group(1)
|
||||
return version.strip('"')
|
||||
|
||||
|
||||
def read_proj_file(filename):
|
||||
with open(filename) as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
def get_dict():
|
||||
version = get_version()
|
||||
return {
|
||||
"version": get_version(),
|
||||
}
|
||||
|
||||
|
||||
def process_linux(s):
|
||||
libs = [
|
||||
"libkaldi-native-fbank-core.so",
|
||||
"libonnxruntime.so.1.14.0",
|
||||
"libsherpa-onnx-c-api.so",
|
||||
"libsherpa-onnx-core.so",
|
||||
]
|
||||
prefix = f"{SHERPA_ONNX_DIR}/linux/sherpa_onnx/lib/"
|
||||
libs = [prefix + lib for lib in libs]
|
||||
libs = "\n ;".join(libs)
|
||||
|
||||
d = get_dict()
|
||||
d["dotnet_rid"] = "linux-x64"
|
||||
d["libs"] = libs
|
||||
|
||||
environment = jinja2.Environment()
|
||||
template = environment.from_string(s)
|
||||
s = template.render(**d)
|
||||
with open("./linux/sherpa-onnx.runtime.csproj", "w") as f:
|
||||
f.write(s)
|
||||
|
||||
|
||||
def process_macos(s):
|
||||
libs = [
|
||||
"libkaldi-native-fbank-core.dylib",
|
||||
"libonnxruntime.1.14.0.dylib",
|
||||
"libsherpa-onnx-c-api.dylib",
|
||||
"libsherpa-onnx-core.dylib",
|
||||
]
|
||||
prefix = f"{SHERPA_ONNX_DIR}/macos/sherpa_onnx/lib/"
|
||||
libs = [prefix + lib for lib in libs]
|
||||
libs = "\n ;".join(libs)
|
||||
|
||||
d = get_dict()
|
||||
d["dotnet_rid"] = "osx-x64"
|
||||
d["libs"] = libs
|
||||
|
||||
environment = jinja2.Environment()
|
||||
template = environment.from_string(s)
|
||||
s = template.render(**d)
|
||||
with open("./macos/sherpa-onnx.runtime.csproj", "w") as f:
|
||||
f.write(s)
|
||||
|
||||
|
||||
def process_windows(s):
|
||||
libs = [
|
||||
"kaldi-native-fbank-core.dll",
|
||||
"onnxruntime.dll",
|
||||
"sherpa-onnx-c-api.dll",
|
||||
"sherpa-onnx-core.dll",
|
||||
]
|
||||
prefix = f"{SHERPA_ONNX_DIR}/windows/sherpa_onnx/lib/"
|
||||
libs = [prefix + lib for lib in libs]
|
||||
libs = "\n ;".join(libs)
|
||||
|
||||
d = get_dict()
|
||||
d["dotnet_rid"] = "win-x64"
|
||||
d["libs"] = libs
|
||||
|
||||
environment = jinja2.Environment()
|
||||
template = environment.from_string(s)
|
||||
s = template.render(**d)
|
||||
with open("./windows/sherpa-onnx.runtime.csproj", "w") as f:
|
||||
f.write(s)
|
||||
|
||||
|
||||
def main():
|
||||
s = read_proj_file("./sherpa-onnx.csproj.runtime.in")
|
||||
process_macos(s)
|
||||
process_linux(s)
|
||||
process_windows(s)
|
||||
|
||||
s = read_proj_file("./sherpa-onnx.csproj.in")
|
||||
d = get_dict()
|
||||
d["packages_dir"] = str(SHERPA_ONNX_DIR / "scripts/dotnet/packages")
|
||||
|
||||
environment = jinja2.Environment()
|
||||
template = environment.from_string(s)
|
||||
s = template.render(**d)
|
||||
with open("./all/sherpa-onnx.csproj", "w") as f:
|
||||
f.write(s)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
259
scripts/dotnet/offline.cs
Normal file
259
scripts/dotnet/offline.cs
Normal file
@@ -0,0 +1,259 @@
|
||||
/// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
/// Copyright (c) 2023 by manyeyes
|
||||
|
||||
using System.Linq;
|
||||
using System.Collections.Generic;
|
||||
using System.Runtime.InteropServices;
|
||||
using System;
|
||||
|
||||
namespace SherpaOnnx
|
||||
{
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
public struct OfflineTransducerModelConfig
|
||||
{
|
||||
public OfflineTransducerModelConfig()
|
||||
{
|
||||
Encoder = "";
|
||||
Decoder = "";
|
||||
Joiner = "";
|
||||
}
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string Encoder;
|
||||
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string Decoder;
|
||||
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string Joiner;
|
||||
}
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
public struct OfflineParaformerModelConfig
|
||||
{
|
||||
public OfflineParaformerModelConfig()
|
||||
{
|
||||
Model = "";
|
||||
}
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string Model;
|
||||
}
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
public struct OfflineNemoEncDecCtcModelConfig
|
||||
{
|
||||
public OfflineNemoEncDecCtcModelConfig()
|
||||
{
|
||||
Model = "";
|
||||
}
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string Model;
|
||||
}
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
public struct OfflineLMConfig
|
||||
{
|
||||
public OfflineLMConfig()
|
||||
{
|
||||
Model = "";
|
||||
Scale = 0.5F;
|
||||
}
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string Model;
|
||||
|
||||
public float Scale;
|
||||
}
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
public struct OfflineModelConfig
|
||||
{
|
||||
public OfflineModelConfig()
|
||||
{
|
||||
Transducer = new OfflineTransducerModelConfig();
|
||||
Paraformer = new OfflineParaformerModelConfig();
|
||||
NeMoCtc = new OfflineNemoEncDecCtcModelConfig();
|
||||
Tokens = "";
|
||||
NumThreads = 1;
|
||||
Debug = 0;
|
||||
}
|
||||
public OfflineTransducerModelConfig Transducer;
|
||||
public OfflineParaformerModelConfig Paraformer;
|
||||
public OfflineNemoEncDecCtcModelConfig NeMoCtc;
|
||||
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string Tokens;
|
||||
|
||||
public int NumThreads;
|
||||
|
||||
public int Debug;
|
||||
}
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
public struct OfflineRecognizerConfig
|
||||
{
|
||||
public OfflineRecognizerConfig()
|
||||
{
|
||||
FeatConfig = new FeatureConfig();
|
||||
ModelConfig = new OfflineModelConfig();
|
||||
LmConfig = new OfflineLMConfig();
|
||||
|
||||
DecodingMethod = "greedy_search";
|
||||
MaxActivePaths = 4;
|
||||
|
||||
}
|
||||
public FeatureConfig FeatConfig;
|
||||
public OfflineModelConfig ModelConfig;
|
||||
public OfflineLMConfig LmConfig;
|
||||
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string DecodingMethod;
|
||||
|
||||
public int MaxActivePaths;
|
||||
}
|
||||
|
||||
public class OfflineRecognizerResult
|
||||
{
|
||||
public OfflineRecognizerResult(IntPtr handle)
|
||||
{
|
||||
Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl));
|
||||
_text = Marshal.PtrToStringUTF8(impl.Text);
|
||||
}
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
struct Impl
|
||||
{
|
||||
public IntPtr Text;
|
||||
}
|
||||
|
||||
private String _text;
|
||||
public String Text => _text;
|
||||
}
|
||||
|
||||
public class OfflineStream : IDisposable
|
||||
{
|
||||
public OfflineStream(IntPtr p)
|
||||
{
|
||||
_handle = new HandleRef(this, p);
|
||||
}
|
||||
|
||||
public void AcceptWaveform(int sampleRate, float[] samples)
|
||||
{
|
||||
AcceptWaveform(Handle, sampleRate, samples, samples.Length);
|
||||
}
|
||||
|
||||
public OfflineRecognizerResult Result
|
||||
{
|
||||
get
|
||||
{
|
||||
IntPtr h = GetResult(_handle.Handle);
|
||||
OfflineRecognizerResult result = new OfflineRecognizerResult(h);
|
||||
DestroyResult(h);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
~OfflineStream()
|
||||
{
|
||||
Cleanup();
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
Cleanup();
|
||||
// Prevent the object from being placed on the
|
||||
// finalization queue
|
||||
System.GC.SuppressFinalize(this);
|
||||
}
|
||||
|
||||
private void Cleanup()
|
||||
{
|
||||
DestroyOfflineStream(Handle);
|
||||
|
||||
// Don't permit the handle to be used again.
|
||||
_handle = new HandleRef(this, IntPtr.Zero);
|
||||
}
|
||||
|
||||
private HandleRef _handle;
|
||||
public IntPtr Handle => _handle.Handle;
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern void DestroyOfflineStream(IntPtr handle);
|
||||
|
||||
[DllImport(Dll.Filename, EntryPoint = "AcceptWaveformOffline")]
|
||||
private static extern void AcceptWaveform(IntPtr handle, int sampleRate, float[] samples, int n);
|
||||
|
||||
[DllImport(Dll.Filename, EntryPoint = "GetOfflineStreamResult")]
|
||||
private static extern IntPtr GetResult(IntPtr handle);
|
||||
|
||||
[DllImport(Dll.Filename, EntryPoint = "DestroyOfflineRecognizerResult")]
|
||||
private static extern void DestroyResult(IntPtr handle);
|
||||
}
|
||||
|
||||
public class OfflineRecognizer : IDisposable
|
||||
{
|
||||
public OfflineRecognizer(OfflineRecognizerConfig config)
|
||||
{
|
||||
IntPtr h = CreateOfflineRecognizer(ref config);
|
||||
_handle = new HandleRef(this, h);
|
||||
}
|
||||
|
||||
public OfflineStream CreateStream()
|
||||
{
|
||||
IntPtr p = CreateOfflineStream(_handle.Handle);
|
||||
return new OfflineStream(p);
|
||||
}
|
||||
|
||||
/// You have to ensure that IsReady(stream) returns true before
|
||||
/// you call this method
|
||||
public void Decode(OfflineStream stream)
|
||||
{
|
||||
Decode(_handle.Handle, stream.Handle);
|
||||
}
|
||||
|
||||
// The caller should ensure all passed streams are ready for decoding.
|
||||
public void Decode(IEnumerable<OfflineStream> streams)
|
||||
{
|
||||
IntPtr[] ptrs = streams.Select(s => s.Handle).ToArray();
|
||||
Decode(_handle.Handle, ptrs, ptrs.Length);
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
Cleanup();
|
||||
// Prevent the object from being placed on the
|
||||
// finalization queue
|
||||
System.GC.SuppressFinalize(this);
|
||||
}
|
||||
|
||||
~OfflineRecognizer()
|
||||
{
|
||||
Cleanup();
|
||||
}
|
||||
|
||||
private void Cleanup()
|
||||
{
|
||||
DestroyOfflineRecognizer(_handle.Handle);
|
||||
|
||||
// Don't permit the handle to be used again.
|
||||
_handle = new HandleRef(this, IntPtr.Zero);
|
||||
}
|
||||
|
||||
private HandleRef _handle;
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern IntPtr CreateOfflineRecognizer(ref OfflineRecognizerConfig config);
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern void DestroyOfflineRecognizer(IntPtr handle);
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern IntPtr CreateOfflineStream(IntPtr handle);
|
||||
|
||||
[DllImport(Dll.Filename, EntryPoint = "DecodeOfflineStream")]
|
||||
private static extern void Decode(IntPtr handle, IntPtr stream);
|
||||
|
||||
[DllImport(Dll.Filename, EntryPoint = "DecodeMultipleOfflineStreams")]
|
||||
private static extern void Decode(IntPtr handle, IntPtr[] streams, int n);
|
||||
}
|
||||
|
||||
}
|
||||
291
scripts/dotnet/online.cs
Normal file
291
scripts/dotnet/online.cs
Normal file
@@ -0,0 +1,291 @@
|
||||
/// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
/// Copyright (c) 2023 by manyeyes
|
||||
|
||||
using System.Linq;
|
||||
using System.Collections.Generic;
|
||||
using System.Runtime.InteropServices;
|
||||
using System;
|
||||
|
||||
namespace SherpaOnnx
|
||||
{
|
||||
internal static class Dll
|
||||
{
|
||||
public const string Filename = "sherpa-onnx-c-api";
|
||||
}
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
public struct OnlineTransducerModelConfig
|
||||
{
|
||||
public OnlineTransducerModelConfig()
|
||||
{
|
||||
Encoder = "";
|
||||
Decoder = "";
|
||||
Joiner = "";
|
||||
Tokens = "";
|
||||
NumThreads = 1;
|
||||
Debug = 0;
|
||||
}
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string Encoder;
|
||||
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string Decoder;
|
||||
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string Joiner;
|
||||
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string Tokens;
|
||||
|
||||
/// Number of threads used to run the neural network model
|
||||
public int NumThreads;
|
||||
|
||||
/// true to print debug information of the model
|
||||
public int Debug;
|
||||
}
|
||||
|
||||
/// It expects 16 kHz 16-bit single channel wave format.
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
public struct FeatureConfig
|
||||
{
|
||||
public FeatureConfig()
|
||||
{
|
||||
SampleRate = 16000;
|
||||
FeatureDim = 80;
|
||||
}
|
||||
/// Sample rate of the input data. MUST match the one expected
|
||||
/// by the model. For instance, it should be 16000 for models provided
|
||||
/// by us.
|
||||
public int SampleRate;
|
||||
|
||||
/// Feature dimension of the model.
|
||||
/// For instance, it should be 80 for models provided by us.
|
||||
public int FeatureDim;
|
||||
}
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
public struct OnlineRecognizerConfig
|
||||
{
|
||||
public OnlineRecognizerConfig()
|
||||
{
|
||||
FeatConfig = new FeatureConfig();
|
||||
TransducerModelConfig = new OnlineTransducerModelConfig();
|
||||
DecodingMethod = "greedy_search";
|
||||
MaxActivePaths = 4;
|
||||
EnableEndpoint = 0;
|
||||
Rule1MinTrailingSilence = 1.2F;
|
||||
Rule2MinTrailingSilence = 2.4F;
|
||||
Rule3MinUtteranceLength = 20.0F;
|
||||
}
|
||||
public FeatureConfig FeatConfig;
|
||||
public OnlineTransducerModelConfig TransducerModelConfig;
|
||||
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string DecodingMethod;
|
||||
|
||||
/// Used only when decoding_method is modified_beam_search
|
||||
/// Example value: 4
|
||||
public int MaxActivePaths;
|
||||
|
||||
/// 0 to disable endpoint detection.
|
||||
/// A non-zero value to enable endpoint detection.
|
||||
public int EnableEndpoint;
|
||||
|
||||
/// An endpoint is detected if trailing silence in seconds is larger than
|
||||
/// this value even if nothing has been decoded.
|
||||
/// Used only when enable_endpoint is not 0.
|
||||
public float Rule1MinTrailingSilence;
|
||||
|
||||
/// An endpoint is detected if trailing silence in seconds is larger than
|
||||
/// this value after something that is not blank has been decoded.
|
||||
/// Used only when enable_endpoint is not 0.
|
||||
public float Rule2MinTrailingSilence;
|
||||
|
||||
/// An endpoint is detected if the utterance in seconds is larger than
|
||||
/// this value.
|
||||
/// Used only when enable_endpoint is not 0.
|
||||
public float Rule3MinUtteranceLength;
|
||||
}
|
||||
|
||||
public class OnlineRecognizerResult
|
||||
{
|
||||
public OnlineRecognizerResult(IntPtr handle)
|
||||
{
|
||||
Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl));
|
||||
_text = Marshal.PtrToStringUTF8(impl.Text);
|
||||
}
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
struct Impl
|
||||
{
|
||||
public IntPtr Text;
|
||||
}
|
||||
|
||||
private String _text;
|
||||
public String Text => _text;
|
||||
}
|
||||
|
||||
public class OnlineStream : IDisposable
|
||||
{
|
||||
public OnlineStream(IntPtr p)
|
||||
{
|
||||
_handle = new HandleRef(this, p);
|
||||
}
|
||||
|
||||
public void AcceptWaveform(int sampleRate, float[] samples)
|
||||
{
|
||||
AcceptWaveform(Handle, sampleRate, samples, samples.Length);
|
||||
}
|
||||
|
||||
public void InputFinished()
|
||||
{
|
||||
InputFinished(Handle);
|
||||
}
|
||||
|
||||
~OnlineStream()
|
||||
{
|
||||
Cleanup();
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
Cleanup();
|
||||
// Prevent the object from being placed on the
|
||||
// finalization queue
|
||||
System.GC.SuppressFinalize(this);
|
||||
}
|
||||
|
||||
private void Cleanup()
|
||||
{
|
||||
DestroyOnlineStream(Handle);
|
||||
|
||||
// Don't permit the handle to be used again.
|
||||
_handle = new HandleRef(this, IntPtr.Zero);
|
||||
}
|
||||
|
||||
private HandleRef _handle;
|
||||
public IntPtr Handle => _handle.Handle;
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern void DestroyOnlineStream(IntPtr handle);
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern void AcceptWaveform(IntPtr handle, int sampleRate, float[] samples, int n);
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern void InputFinished(IntPtr handle);
|
||||
}
|
||||
|
||||
// please see
|
||||
// https://www.mono-project.com/docs/advanced/pinvoke/#gc-safe-pinvoke-code
|
||||
// https://www.mono-project.com/docs/advanced/pinvoke/#properly-disposing-of-resources
|
||||
public class OnlineRecognizer : IDisposable
|
||||
{
|
||||
public OnlineRecognizer(OnlineRecognizerConfig config)
|
||||
{
|
||||
IntPtr h = CreateOnlineRecognizer(ref config);
|
||||
_handle = new HandleRef(this, h);
|
||||
}
|
||||
|
||||
public OnlineStream CreateStream()
|
||||
{
|
||||
IntPtr p = CreateOnlineStream(_handle.Handle);
|
||||
return new OnlineStream(p);
|
||||
}
|
||||
|
||||
/// Return true if the passed stream is ready for decoding.
|
||||
public bool IsReady(OnlineStream stream)
|
||||
{
|
||||
return IsReady(_handle.Handle, stream.Handle) != 0;
|
||||
}
|
||||
|
||||
/// Return true if an endpoint is detected for this stream.
|
||||
/// You probably need to invoke Reset(stream) when this method returns
|
||||
/// true.
|
||||
public bool IsEndpoint(OnlineStream stream)
|
||||
{
|
||||
return IsEndpoint(_handle.Handle, stream.Handle) != 0;
|
||||
}
|
||||
|
||||
/// You have to ensure that IsReady(stream) returns true before
|
||||
/// you call this method
|
||||
public void Decode(OnlineStream stream)
|
||||
{
|
||||
Decode(_handle.Handle, stream.Handle);
|
||||
}
|
||||
|
||||
// The caller should ensure all passed streams are ready for decoding.
|
||||
public void Decode(IEnumerable<OnlineStream> streams)
|
||||
{
|
||||
IntPtr[] ptrs = streams.Select(s => s.Handle).ToArray();
|
||||
Decode(_handle.Handle, ptrs, ptrs.Length);
|
||||
}
|
||||
|
||||
public OnlineRecognizerResult GetResult(OnlineStream stream)
|
||||
{
|
||||
IntPtr h = GetResult(_handle.Handle, stream.Handle);
|
||||
OnlineRecognizerResult result = new OnlineRecognizerResult(h);
|
||||
DestroyResult(h);
|
||||
return result;
|
||||
}
|
||||
|
||||
/// When this method returns, IsEndpoint(stream) will return false.
|
||||
public void Reset(OnlineStream stream)
|
||||
{
|
||||
Reset(_handle.Handle, stream.Handle);
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
Cleanup();
|
||||
// Prevent the object from being placed on the
|
||||
// finalization queue
|
||||
System.GC.SuppressFinalize(this);
|
||||
}
|
||||
|
||||
~OnlineRecognizer()
|
||||
{
|
||||
Cleanup();
|
||||
}
|
||||
|
||||
private void Cleanup()
|
||||
{
|
||||
DestroyOnlineRecognizer(_handle.Handle);
|
||||
|
||||
// Don't permit the handle to be used again.
|
||||
_handle = new HandleRef(this, IntPtr.Zero);
|
||||
}
|
||||
|
||||
private HandleRef _handle;
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern IntPtr CreateOnlineRecognizer(ref OnlineRecognizerConfig config);
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern void DestroyOnlineRecognizer(IntPtr handle);
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern IntPtr CreateOnlineStream(IntPtr handle);
|
||||
|
||||
[DllImport(Dll.Filename, EntryPoint = "IsOnlineStreamReady")]
|
||||
private static extern int IsReady(IntPtr handle, IntPtr stream);
|
||||
|
||||
[DllImport(Dll.Filename, EntryPoint = "DecodeOnlineStream")]
|
||||
private static extern void Decode(IntPtr handle, IntPtr stream);
|
||||
|
||||
[DllImport(Dll.Filename, EntryPoint = "DecodeMultipleOnlineStreams")]
|
||||
private static extern void Decode(IntPtr handle, IntPtr[] streams, int n);
|
||||
|
||||
[DllImport(Dll.Filename, EntryPoint = "GetOnlineStreamResult")]
|
||||
private static extern IntPtr GetResult(IntPtr handle, IntPtr stream);
|
||||
|
||||
[DllImport(Dll.Filename, EntryPoint = "DestroyOnlineRecognizerResult")]
|
||||
private static extern void DestroyResult(IntPtr result);
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern void Reset(IntPtr handle, IntPtr stream);
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern int IsEndpoint(IntPtr handle, IntPtr stream);
|
||||
}
|
||||
}
|
||||
33
scripts/dotnet/run.sh
Executable file
33
scripts/dotnet/run.sh
Executable file
@@ -0,0 +1,33 @@
|
||||
#!/usr/bin/env bash
|
||||
# Copyright (c) 2023 Xiaomi Corporation
|
||||
|
||||
set -ex
|
||||
|
||||
mkdir -p macos linux windows all
|
||||
|
||||
cp ./online.cs all
|
||||
cp ./offline.cs all
|
||||
|
||||
./generate.py
|
||||
|
||||
pushd linux
|
||||
dotnet build -c Release
|
||||
dotnet pack -c Release -o ../packages
|
||||
popd
|
||||
|
||||
pushd macos
|
||||
dotnet build -c Release
|
||||
dotnet pack -c Release -o ../packages
|
||||
popd
|
||||
|
||||
pushd windows
|
||||
dotnet build -c Release
|
||||
dotnet pack -c Release -o ../packages
|
||||
popd
|
||||
|
||||
pushd all
|
||||
dotnet build -c Release
|
||||
dotnet pack -c Release -o ../packages
|
||||
popd
|
||||
|
||||
ls -lh packages
|
||||
56
scripts/dotnet/sherpa-onnx.csproj.in
Normal file
56
scripts/dotnet/sherpa-onnx.csproj.in
Normal file
@@ -0,0 +1,56 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<PackageLicenseExpression>Apache-2.0</PackageLicenseExpression>
|
||||
<PackageReadmeFile>README.md</PackageReadmeFile>
|
||||
<OutputType>Library</OutputType>
|
||||
<LangVersion>10.0</LangVersion>
|
||||
<TargetFrameworks>netstandard2.1;netcoreapp3.1;net6.0;net7.0</TargetFrameworks>
|
||||
<RuntimeIdentifiers>linux-x64;osx-x64;win-x64</RuntimeIdentifiers>
|
||||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||
<AssemblyName>sherpa-onnx</AssemblyName>
|
||||
<Version>{{ version }}</Version>
|
||||
|
||||
<PackageProjectUrl>https://github.com/k2-fsa/sherpa-onnx</PackageProjectUrl>
|
||||
<RepositoryUrl>https://github.com/k2-fsa/sherpa-onnx</RepositoryUrl>
|
||||
<PackageTags>speech recognition voice audio stt asr speech-to-text AI offline
|
||||
privacy open-sourced next-gen-kaldi k2 kaldi2 sherpa-onnx</PackageTags>
|
||||
|
||||
<Authors>The Next-gen Kaldi development team</Authors>
|
||||
<Owners>The Next-gen Kaldi development team</Owners>
|
||||
<Company>Xiaomi Corporation</Company>
|
||||
<Copyright>Copyright 2019-2023 Xiaomi Corporation</Copyright>
|
||||
<Description>sherpa-onnx is an open-source real-time speech recognition toolkit developed
|
||||
by the Next-gen Kaldi team. It supports streaming recognition on a variety of
|
||||
platforms such as Android, iOS, Raspberry, Linux, Windows, macOS, etc.
|
||||
|
||||
It does not require Internet connection during recognition.
|
||||
|
||||
See the documentation https://k2-fsa.github.io/sherpa/onnx/index.html
|
||||
for details.
|
||||
</Description>
|
||||
|
||||
<!-- Pack Option -->
|
||||
<Title>sherpa-onnx v{{ version }}</Title>
|
||||
<PackageId>org.k2fsa.sherpa.onnx</PackageId>
|
||||
|
||||
<!-- Signing -->
|
||||
<SignAssembly>false</SignAssembly>
|
||||
<PublicSign>false</PublicSign>
|
||||
<DelaySign>false</DelaySign>
|
||||
</PropertyGroup>
|
||||
|
||||
<PropertyGroup>
|
||||
<RestoreSources>{{ packages_dir }};$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Include="../README.md" Pack="true" PackagePath="/"/>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx.runtime.linux-x64" Version="{{ version }}" />
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx.runtime.osx-x64" Version="{{ version }}" />
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx.runtime.win-x64" Version="{{ version }}" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
50
scripts/dotnet/sherpa-onnx.csproj.runtime.in
Normal file
50
scripts/dotnet/sherpa-onnx.csproj.runtime.in
Normal file
@@ -0,0 +1,50 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<PackageLicenseExpression>Apache-2.0</PackageLicenseExpression>
|
||||
<PackageReadmeFile>README.md</PackageReadmeFile>
|
||||
<OutputType>Library</OutputType>
|
||||
<TargetFrameworks>netstandard2.0;netcoreapp3.1;net6.0</TargetFrameworks>
|
||||
<RuntimeIdentifier>{{ dotnet_rid }}</RuntimeIdentifier>
|
||||
<AssemblyName>sherpa-onnx</AssemblyName>
|
||||
<Version>{{ version }}</Version>
|
||||
|
||||
<PackageProjectUrl>https://github.com/k2-fsa/sherpa-onnx</PackageProjectUrl>
|
||||
<RepositoryUrl>https://github.com/k2-fsa/sherpa-onnx</RepositoryUrl>
|
||||
<PackageTags>speech recognition voice audio stt asr speech-to-text AI offline
|
||||
privacy open-sourced next-gen-kaldi k2 kaldi2 sherpa-onnx</PackageTags>
|
||||
|
||||
<!-- Nuget Properties -->
|
||||
<Description>.NET native {{ dotnet_rid }} wrapper for the sherpa-onnx project.
|
||||
|
||||
In general, you don't need to use this package directly.
|
||||
|
||||
Please use https://www.nuget.org/packages/org.k2fsa.sherpa.onnx instead
|
||||
</Description>
|
||||
<IncludeBuildOutput>false</IncludeBuildOutput>
|
||||
|
||||
<!-- Pack Option -->
|
||||
<Title>sherpa-onnx {{ dotnet_rid }} v{{ version }}</Title>
|
||||
<PackageId>org.k2fsa.sherpa.onnx.runtime.{{ dotnet_rid }}</PackageId>
|
||||
|
||||
<!-- Signing -->
|
||||
<SignAssembly>false</SignAssembly>
|
||||
<PublicSign>false</PublicSign>
|
||||
<DelaySign>false</DelaySign>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Include="../README.md" Pack="true" PackagePath="/"/>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<!-- Native library must be in native directory... -->
|
||||
<!-- If project is built as a STATIC_LIBRARY (e.g. Windows) then we don't have to include it -->
|
||||
<Content Include="
|
||||
{{ libs }}
|
||||
">
|
||||
<PackagePath>runtimes/{{ dotnet_rid }}/native/%(Filename)%(Extension)</PackagePath>
|
||||
<Pack>true</Pack>
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</Content>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -2,6 +2,11 @@ include_directories(${CMAKE_SOURCE_DIR})
|
||||
add_library(sherpa-onnx-c-api c-api.cc)
|
||||
target_link_libraries(sherpa-onnx-c-api sherpa-onnx-core)
|
||||
|
||||
if(BUILD_SHARED_LIBS)
|
||||
target_compile_definitions(sherpa-onnx-c-api PRIVATE SHERPA_ONNX_BUILD_SHARED_LIBS=1)
|
||||
target_compile_definitions(sherpa-onnx-c-api PRIVATE SHERPA_ONNX_BUILD_MAIN_LIB=1)
|
||||
endif()
|
||||
|
||||
install(TARGETS sherpa-onnx-c-api DESTINATION lib)
|
||||
|
||||
install(FILES c-api.h
|
||||
|
||||
@@ -10,10 +10,11 @@
|
||||
#include <vector>
|
||||
|
||||
#include "sherpa-onnx/csrc/display.h"
|
||||
#include "sherpa-onnx/csrc/offline-recognizer.h"
|
||||
#include "sherpa-onnx/csrc/online-recognizer.h"
|
||||
|
||||
struct SherpaOnnxOnlineRecognizer {
|
||||
sherpa_onnx::OnlineRecognizer *impl;
|
||||
std::unique_ptr<sherpa_onnx::OnlineRecognizer> impl;
|
||||
};
|
||||
|
||||
struct SherpaOnnxOnlineStream {
|
||||
@@ -56,14 +57,19 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer(
|
||||
recognizer_config.endpoint_config.rule3.min_utterance_length =
|
||||
config->rule3_min_utterance_length;
|
||||
|
||||
if (config->model_config.debug) {
|
||||
fprintf(stderr, "%s\n", recognizer_config.ToString().c_str());
|
||||
}
|
||||
|
||||
SherpaOnnxOnlineRecognizer *recognizer = new SherpaOnnxOnlineRecognizer;
|
||||
recognizer->impl = new sherpa_onnx::OnlineRecognizer(recognizer_config);
|
||||
|
||||
recognizer->impl =
|
||||
std::make_unique<sherpa_onnx::OnlineRecognizer>(recognizer_config);
|
||||
|
||||
return recognizer;
|
||||
}
|
||||
|
||||
void DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer *recognizer) {
|
||||
delete recognizer->impl;
|
||||
delete recognizer;
|
||||
}
|
||||
|
||||
@@ -144,3 +150,116 @@ void DestroyDisplay(SherpaOnnxDisplay *display) { delete display; }
|
||||
void SherpaOnnxPrint(SherpaOnnxDisplay *display, int32_t idx, const char *s) {
|
||||
display->impl->Print(idx, s);
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// For offline ASR (i.e., non-streaming ASR)
|
||||
// ============================================================
|
||||
//
|
||||
struct SherpaOnnxOfflineRecognizer {
|
||||
std::unique_ptr<sherpa_onnx::OfflineRecognizer> impl;
|
||||
};
|
||||
|
||||
struct SherpaOnnxOfflineStream {
|
||||
std::unique_ptr<sherpa_onnx::OfflineStream> impl;
|
||||
explicit SherpaOnnxOfflineStream(
|
||||
std::unique_ptr<sherpa_onnx::OfflineStream> p)
|
||||
: impl(std::move(p)) {}
|
||||
};
|
||||
|
||||
SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer(
|
||||
const SherpaOnnxOfflineRecognizerConfig *config) {
|
||||
sherpa_onnx::OfflineRecognizerConfig recognizer_config;
|
||||
|
||||
recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate;
|
||||
|
||||
recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim;
|
||||
|
||||
recognizer_config.model_config.transducer.encoder_filename =
|
||||
config->model_config.transducer.encoder;
|
||||
|
||||
recognizer_config.model_config.transducer.decoder_filename =
|
||||
config->model_config.transducer.decoder;
|
||||
|
||||
recognizer_config.model_config.transducer.joiner_filename =
|
||||
config->model_config.transducer.joiner;
|
||||
|
||||
recognizer_config.model_config.paraformer.model =
|
||||
config->model_config.paraformer.model;
|
||||
|
||||
recognizer_config.model_config.nemo_ctc.model =
|
||||
config->model_config.nemo_ctc.model;
|
||||
|
||||
recognizer_config.model_config.tokens = config->model_config.tokens;
|
||||
recognizer_config.model_config.num_threads = config->model_config.num_threads;
|
||||
recognizer_config.model_config.debug = config->model_config.debug;
|
||||
|
||||
recognizer_config.lm_config.model = config->lm_config.model;
|
||||
recognizer_config.lm_config.scale = config->lm_config.scale;
|
||||
|
||||
recognizer_config.decoding_method = config->decoding_method;
|
||||
recognizer_config.max_active_paths = config->max_active_paths;
|
||||
|
||||
if (config->model_config.debug) {
|
||||
fprintf(stderr, "%s\n", recognizer_config.ToString().c_str());
|
||||
}
|
||||
|
||||
SherpaOnnxOfflineRecognizer *recognizer = new SherpaOnnxOfflineRecognizer;
|
||||
|
||||
recognizer->impl =
|
||||
std::make_unique<sherpa_onnx::OfflineRecognizer>(recognizer_config);
|
||||
|
||||
return recognizer;
|
||||
}
|
||||
|
||||
void DestroyOfflineRecognizer(SherpaOnnxOfflineRecognizer *recognizer) {
|
||||
delete recognizer;
|
||||
}
|
||||
|
||||
SherpaOnnxOfflineStream *CreateOfflineStream(
|
||||
const SherpaOnnxOfflineRecognizer *recognizer) {
|
||||
SherpaOnnxOfflineStream *stream =
|
||||
new SherpaOnnxOfflineStream(recognizer->impl->CreateStream());
|
||||
return stream;
|
||||
}
|
||||
|
||||
void DestoryOfflineStream(SherpaOnnxOfflineStream *stream) { delete stream; }
|
||||
|
||||
void AcceptWaveformOffline(SherpaOnnxOfflineStream *stream, int32_t sample_rate,
|
||||
const float *samples, int32_t n) {
|
||||
stream->impl->AcceptWaveform(sample_rate, samples, n);
|
||||
}
|
||||
|
||||
void DecodeOfflineStream(SherpaOnnxOfflineRecognizer *recognizer,
|
||||
SherpaOnnxOfflineStream *stream) {
|
||||
recognizer->impl->DecodeStream(stream->impl.get());
|
||||
}
|
||||
|
||||
void DecodeMultipleOfflineStreams(SherpaOnnxOfflineRecognizer *recognizer,
|
||||
SherpaOnnxOfflineStream **streams,
|
||||
int32_t n) {
|
||||
std::vector<sherpa_onnx::OfflineStream *> ss(n);
|
||||
for (int32_t i = 0; i != n; ++i) {
|
||||
ss[i] = streams[i]->impl.get();
|
||||
}
|
||||
recognizer->impl->DecodeStreams(ss.data(), n);
|
||||
}
|
||||
|
||||
SherpaOnnxOfflineRecognizerResult *GetOfflineStreamResult(
|
||||
SherpaOnnxOfflineStream *stream) {
|
||||
const sherpa_onnx::OfflineRecognitionResult &result =
|
||||
stream->impl->GetResult();
|
||||
const auto &text = result.text;
|
||||
|
||||
auto r = new SherpaOnnxOfflineRecognizerResult;
|
||||
r->text = new char[text.size() + 1];
|
||||
std::copy(text.begin(), text.end(), const_cast<char *>(r->text));
|
||||
const_cast<char *>(r->text)[text.size()] = 0;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
void DestroyOfflineRecognizerResult(
|
||||
const SherpaOnnxOfflineRecognizerResult *r) {
|
||||
delete[] r->text;
|
||||
delete r;
|
||||
}
|
||||
|
||||
@@ -18,12 +18,35 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// See https://github.com/pytorch/pytorch/blob/main/c10/macros/Export.h
|
||||
// We will set SHERPA_ONNX_BUILD_SHARED_LIBS and SHERPA_ONNX_BUILD_MAIN_LIB in
|
||||
// CMakeLists.txt
|
||||
|
||||
#if defined(_WIN32)
|
||||
#if defined(SHERPA_ONNX_BUILD_SHARED_LIBS)
|
||||
#define SHERPA_ONNX_EXPORT __declspec(dllexport)
|
||||
#define SHERPA_ONNX_IMPORT __declspec(dllimport)
|
||||
#else
|
||||
#define SHERPA_ONNX_EXPORT
|
||||
#define SHERPA_ONNX_IMPORT
|
||||
#endif
|
||||
#else // WIN32
|
||||
#define SHERPA_ONNX_EXPORT __attribute__((__visibility__("default")))
|
||||
#define SHERPA_ONNX_IMPORT SHERPA_ONNX_EXPORT
|
||||
#endif
|
||||
|
||||
#if defined(SHERPA_ONNX_BUILD_MAIN_LIB)
|
||||
#define SHERPA_ONNX_API SHERPA_ONNX_EXPORT
|
||||
#else
|
||||
#define SHERPA_ONNX_API SHERPA_ONNX_IMPORT
|
||||
#endif
|
||||
|
||||
/// Please refer to
|
||||
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||
/// to download pre-trained models. That is, you can find encoder-xxx.onnx
|
||||
/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct
|
||||
/// from there.
|
||||
typedef struct SherpaOnnxOnlineTransducerModelConfig {
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOnlineTransducerModelConfig {
|
||||
const char *encoder;
|
||||
const char *decoder;
|
||||
const char *joiner;
|
||||
@@ -33,7 +56,7 @@ typedef struct SherpaOnnxOnlineTransducerModelConfig {
|
||||
} SherpaOnnxOnlineTransducerModelConfig;
|
||||
|
||||
/// It expects 16 kHz 16-bit single channel wave format.
|
||||
typedef struct SherpaOnnxFeatureConfig {
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxFeatureConfig {
|
||||
/// Sample rate of the input data. MUST match the one expected
|
||||
/// by the model. For instance, it should be 16000 for models provided
|
||||
/// by us.
|
||||
@@ -44,7 +67,7 @@ typedef struct SherpaOnnxFeatureConfig {
|
||||
int32_t feature_dim;
|
||||
} SherpaOnnxFeatureConfig;
|
||||
|
||||
typedef struct SherpaOnnxOnlineRecognizerConfig {
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerConfig {
|
||||
SherpaOnnxFeatureConfig feat_config;
|
||||
SherpaOnnxOnlineTransducerModelConfig model_config;
|
||||
|
||||
@@ -75,7 +98,7 @@ typedef struct SherpaOnnxOnlineRecognizerConfig {
|
||||
float rule3_min_utterance_length;
|
||||
} SherpaOnnxOnlineRecognizerConfig;
|
||||
|
||||
typedef struct SherpaOnnxOnlineRecognizerResult {
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerResult {
|
||||
const char *text;
|
||||
// TODO(fangjun): Add more fields
|
||||
} SherpaOnnxOnlineRecognizerResult;
|
||||
@@ -83,32 +106,34 @@ typedef struct SherpaOnnxOnlineRecognizerResult {
|
||||
/// Note: OnlineRecognizer here means StreamingRecognizer.
|
||||
/// It does not need to access the Internet during recognition.
|
||||
/// Everything is run locally.
|
||||
typedef struct SherpaOnnxOnlineRecognizer SherpaOnnxOnlineRecognizer;
|
||||
typedef struct SherpaOnnxOnlineStream SherpaOnnxOnlineStream;
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizer
|
||||
SherpaOnnxOnlineRecognizer;
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOnlineStream SherpaOnnxOnlineStream;
|
||||
|
||||
/// @param config Config for the recongizer.
|
||||
/// @param config Config for the recognizer.
|
||||
/// @return Return a pointer to the recognizer. The user has to invoke
|
||||
// DestroyOnlineRecognizer() to free it to avoid memory leak.
|
||||
SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer(
|
||||
SHERPA_ONNX_API SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer(
|
||||
const SherpaOnnxOnlineRecognizerConfig *config);
|
||||
|
||||
/// Free a pointer returned by CreateOnlineRecognizer()
|
||||
///
|
||||
/// @param p A pointer returned by CreateOnlineRecognizer()
|
||||
void DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer *recognizer);
|
||||
SHERPA_ONNX_API void DestroyOnlineRecognizer(
|
||||
SherpaOnnxOnlineRecognizer *recognizer);
|
||||
|
||||
/// Create an online stream for accepting wave samples.
|
||||
///
|
||||
/// @param recognizer A pointer returned by CreateOnlineRecognizer()
|
||||
/// @return Return a pointer to an OnlineStream. The user has to invoke
|
||||
/// DestoryOnlineStream() to free it to avoid memory leak.
|
||||
SherpaOnnxOnlineStream *CreateOnlineStream(
|
||||
SHERPA_ONNX_API SherpaOnnxOnlineStream *CreateOnlineStream(
|
||||
const SherpaOnnxOnlineRecognizer *recognizer);
|
||||
|
||||
/// Destory an online stream.
|
||||
/// Destroy an online stream.
|
||||
///
|
||||
/// @param stream A pointer returned by CreateOnlineStream()
|
||||
void DestoryOnlineStream(SherpaOnnxOnlineStream *stream);
|
||||
SHERPA_ONNX_API void DestoryOnlineStream(SherpaOnnxOnlineStream *stream);
|
||||
|
||||
/// Accept input audio samples and compute the features.
|
||||
/// The user has to invoke DecodeOnlineStream() to run the neural network and
|
||||
@@ -121,16 +146,17 @@ void DestoryOnlineStream(SherpaOnnxOnlineStream *stream);
|
||||
/// @param samples A pointer to a 1-D array containing audio samples.
|
||||
/// The range of samples has to be normalized to [-1, 1].
|
||||
/// @param n Number of elements in the samples array.
|
||||
void AcceptWaveform(SherpaOnnxOnlineStream *stream, int32_t sample_rate,
|
||||
const float *samples, int32_t n);
|
||||
SHERPA_ONNX_API void AcceptWaveform(SherpaOnnxOnlineStream *stream,
|
||||
int32_t sample_rate, const float *samples,
|
||||
int32_t n);
|
||||
|
||||
/// Return 1 if there are enough number of feature frames for decoding.
|
||||
/// Return 0 otherwise.
|
||||
///
|
||||
/// @param recognizer A pointer returned by CreateOnlineRecognizer
|
||||
/// @param stream A pointer returned by CreateOnlineStream
|
||||
int32_t IsOnlineStreamReady(SherpaOnnxOnlineRecognizer *recognizer,
|
||||
SherpaOnnxOnlineStream *stream);
|
||||
SHERPA_ONNX_API int32_t IsOnlineStreamReady(
|
||||
SherpaOnnxOnlineRecognizer *recognizer, SherpaOnnxOnlineStream *stream);
|
||||
|
||||
/// Call this function to run the neural network model and decoding.
|
||||
//
|
||||
@@ -142,8 +168,8 @@ int32_t IsOnlineStreamReady(SherpaOnnxOnlineRecognizer *recognizer,
|
||||
/// DecodeOnlineStream(recognizer, stream);
|
||||
/// }
|
||||
///
|
||||
void DecodeOnlineStream(SherpaOnnxOnlineRecognizer *recognizer,
|
||||
SherpaOnnxOnlineStream *stream);
|
||||
SHERPA_ONNX_API void DecodeOnlineStream(SherpaOnnxOnlineRecognizer *recognizer,
|
||||
SherpaOnnxOnlineStream *stream);
|
||||
|
||||
/// This function is similar to DecodeOnlineStream(). It decodes multiple
|
||||
/// OnlineStream in parallel.
|
||||
@@ -155,8 +181,9 @@ void DecodeOnlineStream(SherpaOnnxOnlineRecognizer *recognizer,
|
||||
/// @param streams A pointer array containing pointers returned by
|
||||
/// CreateOnlineRecognizer()
|
||||
/// @param n Number of elements in the given streams array.
|
||||
void DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer *recognizer,
|
||||
SherpaOnnxOnlineStream **streams, int32_t n);
|
||||
SHERPA_ONNX_API void DecodeMultipleOnlineStreams(
|
||||
SherpaOnnxOnlineRecognizer *recognizer, SherpaOnnxOnlineStream **streams,
|
||||
int32_t n);
|
||||
|
||||
/// Get the decoding results so far for an OnlineStream.
|
||||
///
|
||||
@@ -165,47 +192,188 @@ void DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer *recognizer,
|
||||
/// @return A pointer containing the result. The user has to invoke
|
||||
/// DestroyOnlineRecognizerResult() to free the returned pointer to
|
||||
/// avoid memory leak.
|
||||
SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult(
|
||||
SHERPA_ONNX_API SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult(
|
||||
SherpaOnnxOnlineRecognizer *recognizer, SherpaOnnxOnlineStream *stream);
|
||||
|
||||
/// Destroy the pointer returned by GetOnlineStreamResult().
|
||||
///
|
||||
/// @param r A pointer returned by GetOnlineStreamResult()
|
||||
void DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult *r);
|
||||
SHERPA_ONNX_API void DestroyOnlineRecognizerResult(
|
||||
const SherpaOnnxOnlineRecognizerResult *r);
|
||||
|
||||
/// Reset an OnlineStream , which clears the neural network model state
|
||||
/// and the state for decoding.
|
||||
///
|
||||
/// @param recognizer A pointer returned by CreateOnlineRecognizer().
|
||||
/// @param stream A pointer returned by CreateOnlineStream
|
||||
void Reset(SherpaOnnxOnlineRecognizer *recognizer,
|
||||
SherpaOnnxOnlineStream *stream);
|
||||
SHERPA_ONNX_API void Reset(SherpaOnnxOnlineRecognizer *recognizer,
|
||||
SherpaOnnxOnlineStream *stream);
|
||||
|
||||
/// Signal that no more audio samples would be available.
|
||||
/// After this call, you cannot call AcceptWaveform() any more.
|
||||
///
|
||||
/// @param stream A pointer returned by CreateOnlineStream()
|
||||
void InputFinished(SherpaOnnxOnlineStream *stream);
|
||||
SHERPA_ONNX_API void InputFinished(SherpaOnnxOnlineStream *stream);
|
||||
|
||||
/// Return 1 if an endpoint has been detected.
|
||||
///
|
||||
/// @param recognizer A pointer returned by CreateOnlineRecognizer()
|
||||
/// @param stream A pointer returned by CreateOnlineStream()
|
||||
/// @return Return 1 if an endpoint is detected. Return 0 otherwise.
|
||||
int32_t IsEndpoint(SherpaOnnxOnlineRecognizer *recognizer,
|
||||
SherpaOnnxOnlineStream *stream);
|
||||
SHERPA_ONNX_API int32_t IsEndpoint(SherpaOnnxOnlineRecognizer *recognizer,
|
||||
SherpaOnnxOnlineStream *stream);
|
||||
|
||||
// for displaying results on Linux/macOS.
|
||||
typedef struct SherpaOnnxDisplay SherpaOnnxDisplay;
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxDisplay SherpaOnnxDisplay;
|
||||
|
||||
/// Create a display object. Must be freed using DestroyDisplay to avoid
|
||||
/// memory leak.
|
||||
SherpaOnnxDisplay *CreateDisplay(int32_t max_word_per_line);
|
||||
SHERPA_ONNX_API SherpaOnnxDisplay *CreateDisplay(int32_t max_word_per_line);
|
||||
|
||||
void DestroyDisplay(SherpaOnnxDisplay *display);
|
||||
SHERPA_ONNX_API void DestroyDisplay(SherpaOnnxDisplay *display);
|
||||
|
||||
/// Print the result.
|
||||
void SherpaOnnxPrint(SherpaOnnxDisplay *display, int32_t idx, const char *s);
|
||||
SHERPA_ONNX_API void SherpaOnnxPrint(SherpaOnnxDisplay *display, int32_t idx,
|
||||
const char *s);
|
||||
// ============================================================
|
||||
// For offline ASR (i.e., non-streaming ASR)
|
||||
// ============================================================
|
||||
|
||||
/// Please refer to
|
||||
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||
/// to download pre-trained models. That is, you can find encoder-xxx.onnx
|
||||
/// decoder-xxx.onnx, and joiner-xxx.onnx for this struct
|
||||
/// from there.
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTransducerModelConfig {
|
||||
const char *encoder;
|
||||
const char *decoder;
|
||||
const char *joiner;
|
||||
} SherpaOnnxOfflineTransducerModelConfig;
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineParaformerModelConfig {
|
||||
const char *model;
|
||||
} SherpaOnnxOfflineParaformerModelConfig;
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineNemoEncDecCtcModelConfig {
|
||||
const char *model;
|
||||
} SherpaOnnxOfflineNemoEncDecCtcModelConfig;
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineLMConfig {
|
||||
const char *model;
|
||||
float scale;
|
||||
} SherpaOnnxOfflineLMConfig;
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig {
|
||||
SherpaOnnxOfflineTransducerModelConfig transducer;
|
||||
SherpaOnnxOfflineParaformerModelConfig paraformer;
|
||||
SherpaOnnxOfflineNemoEncDecCtcModelConfig nemo_ctc;
|
||||
|
||||
const char *tokens;
|
||||
int32_t num_threads;
|
||||
int32_t debug;
|
||||
} SherpaOnnxOfflineModelConfig;
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig {
|
||||
SherpaOnnxFeatureConfig feat_config;
|
||||
SherpaOnnxOfflineModelConfig model_config;
|
||||
SherpaOnnxOfflineLMConfig lm_config;
|
||||
|
||||
const char *decoding_method;
|
||||
int32_t max_active_paths;
|
||||
} SherpaOnnxOfflineRecognizerConfig;
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizer
|
||||
SherpaOnnxOfflineRecognizer;
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineStream SherpaOnnxOfflineStream;
|
||||
|
||||
/// @param config Config for the recognizer.
|
||||
/// @return Return a pointer to the recognizer. The user has to invoke
|
||||
// DestroyOfflineRecognizer() to free it to avoid memory leak.
|
||||
SHERPA_ONNX_API SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer(
|
||||
const SherpaOnnxOfflineRecognizerConfig *config);
|
||||
|
||||
/// Free a pointer returned by CreateOfflineRecognizer()
|
||||
///
|
||||
/// @param p A pointer returned by CreateOfflineRecognizer()
|
||||
SHERPA_ONNX_API void DestroyOfflineRecognizer(
|
||||
SherpaOnnxOfflineRecognizer *recognizer);
|
||||
|
||||
/// Create an offline stream for accepting wave samples.
|
||||
///
|
||||
/// @param recognizer A pointer returned by CreateOfflineRecognizer()
|
||||
/// @return Return a pointer to an OfflineStream. The user has to invoke
|
||||
/// DestoryOfflineStream() to free it to avoid memory leak.
|
||||
SHERPA_ONNX_API SherpaOnnxOfflineStream *CreateOfflineStream(
|
||||
const SherpaOnnxOfflineRecognizer *recognizer);
|
||||
|
||||
/// Destroy an offline stream.
|
||||
///
|
||||
/// @param stream A pointer returned by CreateOfflineStream()
|
||||
SHERPA_ONNX_API void DestoryOfflineStream(SherpaOnnxOfflineStream *stream);
|
||||
|
||||
/// Accept input audio samples and compute the features.
|
||||
/// The user has to invoke DecodeOfflineStream() to run the neural network and
|
||||
/// decoding.
|
||||
///
|
||||
/// @param stream A pointer returned by CreateOfflineStream().
|
||||
/// @param sample_rate Sample rate of the input samples. If it is different
|
||||
/// from config.feat_config.sample_rate, we will do
|
||||
/// resampling inside sherpa-onnx.
|
||||
/// @param samples A pointer to a 1-D array containing audio samples.
|
||||
/// The range of samples has to be normalized to [-1, 1].
|
||||
/// @param n Number of elements in the samples array.
|
||||
///
|
||||
/// @caution: For each offline stream, please invoke this function only once!
|
||||
SHERPA_ONNX_API void AcceptWaveformOffline(SherpaOnnxOfflineStream *stream,
|
||||
int32_t sample_rate,
|
||||
const float *samples, int32_t n);
|
||||
/// Decode an offline stream.
|
||||
///
|
||||
/// We assume you have invoked AcceptWaveformOffline() for the given stream
|
||||
/// before calling this function.
|
||||
///
|
||||
/// @param recognizer A pointer returned by CreateOfflineRecognizer().
|
||||
/// @param stream A pointer returned by CreateOfflineStream()
|
||||
SHERPA_ONNX_API void DecodeOfflineStream(
|
||||
SherpaOnnxOfflineRecognizer *recognizer, SherpaOnnxOfflineStream *stream);
|
||||
|
||||
/// Decode a list offline streams in parallel.
|
||||
///
|
||||
/// We assume you have invoked AcceptWaveformOffline() for each stream
|
||||
/// before calling this function.
|
||||
///
|
||||
/// @param recognizer A pointer returned by CreateOfflineRecognizer().
|
||||
/// @param streams A pointer pointer array containing pointers returned
|
||||
/// by CreateOfflineStream().
|
||||
/// @param n Number of entries in the given streams.
|
||||
SHERPA_ONNX_API void DecodeMultipleOfflineStreams(
|
||||
SherpaOnnxOfflineRecognizer *recognizer, SherpaOnnxOfflineStream **streams,
|
||||
int32_t n);
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerResult {
|
||||
const char *text;
|
||||
// TODO(fangjun): Add more fields
|
||||
} SherpaOnnxOfflineRecognizerResult;
|
||||
|
||||
/// Get the result of the offline stream.
|
||||
///
|
||||
/// We assume you have called DecodeOfflineStream() or
|
||||
/// DecodeMultipleOfflineStreams() with the given stream before calling
|
||||
/// this function.
|
||||
///
|
||||
/// @param stream A pointer returned by CreateOfflineStream().
|
||||
/// @return Return a pointer to the result. The user has to invoke
|
||||
/// DestroyOnlineRecognizerResult() to free the returned pointer to
|
||||
/// avoid memory leak.
|
||||
SHERPA_ONNX_API SherpaOnnxOfflineRecognizerResult *GetOfflineStreamResult(
|
||||
SherpaOnnxOfflineStream *stream);
|
||||
|
||||
/// Destroy the pointer returned by GetOfflineStreamResult().
|
||||
///
|
||||
/// @param r A pointer returned by GetOfflineStreamResult()
|
||||
SHERPA_ONNX_API void DestroyOfflineRecognizerResult(
|
||||
const SherpaOnnxOfflineRecognizerResult *r);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
|
||||
@@ -1,872 +0,0 @@
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace SherpaOnnx
|
||||
{
|
||||
/// <summary>
|
||||
/// online recognizer package
|
||||
/// Copyright (c) 2023 by manyeyes
|
||||
/// </summary>
|
||||
public class OnlineBase : IDisposable
|
||||
{
|
||||
public void Dispose()
|
||||
{
|
||||
Dispose(disposing: true);
|
||||
GC.SuppressFinalize(this);
|
||||
}
|
||||
protected virtual void Dispose(bool disposing)
|
||||
{
|
||||
if (!disposing)
|
||||
{
|
||||
if (_onlineRecognizerResult != IntPtr.Zero)
|
||||
{
|
||||
SherpaOnnxSharp.DestroyOnlineRecognizerResult(_onlineRecognizerResult);
|
||||
_onlineRecognizerResult = IntPtr.Zero;
|
||||
}
|
||||
if (_onlineStream.impl != IntPtr.Zero)
|
||||
{
|
||||
SherpaOnnxSharp.DestroyOnlineStream(_onlineStream);
|
||||
_onlineStream.impl = IntPtr.Zero;
|
||||
}
|
||||
if (_onlineRecognizer.impl != IntPtr.Zero)
|
||||
{
|
||||
SherpaOnnxSharp.DestroyOnlineRecognizer(_onlineRecognizer);
|
||||
_onlineRecognizer.impl = IntPtr.Zero;
|
||||
}
|
||||
this._disposed = true;
|
||||
}
|
||||
}
|
||||
~OnlineBase()
|
||||
{
|
||||
Dispose(this._disposed);
|
||||
}
|
||||
internal SherpaOnnxOnlineStream _onlineStream;
|
||||
internal IntPtr _onlineRecognizerResult;
|
||||
internal SherpaOnnxOnlineRecognizer _onlineRecognizer;
|
||||
internal bool _disposed = false;
|
||||
}
|
||||
public class OnlineStream : OnlineBase
|
||||
{
|
||||
internal OnlineStream(SherpaOnnxOnlineStream onlineStream)
|
||||
{
|
||||
this._onlineStream = onlineStream;
|
||||
}
|
||||
protected override void Dispose(bool disposing)
|
||||
{
|
||||
if (!disposing)
|
||||
{
|
||||
SherpaOnnxSharp.DestroyOnlineStream(_onlineStream);
|
||||
_onlineStream.impl = IntPtr.Zero;
|
||||
this._disposed = true;
|
||||
base.Dispose();
|
||||
}
|
||||
}
|
||||
}
|
||||
public class OnlineRecognizerResult : OnlineBase
|
||||
{
|
||||
internal OnlineRecognizerResult(IntPtr onlineRecognizerResult)
|
||||
{
|
||||
this._onlineRecognizerResult = onlineRecognizerResult;
|
||||
}
|
||||
protected override void Dispose(bool disposing)
|
||||
{
|
||||
if (!disposing)
|
||||
{
|
||||
SherpaOnnxSharp.DestroyOnlineRecognizerResult(_onlineRecognizerResult);
|
||||
_onlineRecognizerResult = IntPtr.Zero;
|
||||
this._disposed = true;
|
||||
base.Dispose(disposing);
|
||||
}
|
||||
}
|
||||
}
|
||||
public class OnlineRecognizer<T> : OnlineBase
|
||||
where T : class, new()
|
||||
{
|
||||
|
||||
public OnlineRecognizer(T t,
|
||||
string tokensFilePath, string decoding_method = "greedy_search",
|
||||
int sample_rate = 16000, int feature_dim = 80,
|
||||
int num_threads = 2, bool debug = false, int max_active_paths = 4,
|
||||
int enable_endpoint=0,int rule1_min_trailing_silence=0,
|
||||
int rule2_min_trailing_silence=0,int rule3_min_utterance_length=0)
|
||||
{
|
||||
SherpaOnnxOnlineTransducer transducer = new SherpaOnnxOnlineTransducer();
|
||||
SherpaOnnxOnlineModelConfig model_config = new SherpaOnnxOnlineModelConfig();
|
||||
if (t is not null && t.GetType() == typeof(OnlineTransducer))
|
||||
{
|
||||
OnlineTransducer? onlineTransducer = t as OnlineTransducer;
|
||||
#pragma warning disable CS8602 // 解引用可能出现空引用。
|
||||
Trace.Assert(File.Exists(onlineTransducer.DecoderFilename)
|
||||
&& File.Exists(onlineTransducer.EncoderFilename)
|
||||
&& File.Exists(onlineTransducer.JoinerFilename), "Please provide a model");
|
||||
#pragma warning restore CS8602 // 解引用可能出现空引用。
|
||||
Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens");
|
||||
Trace.Assert(num_threads > 0, "num_threads must be greater than 0");
|
||||
transducer.encoder_filename = onlineTransducer.EncoderFilename;
|
||||
transducer.decoder_filename = onlineTransducer.DecoderFilename;
|
||||
transducer.joiner_filename = onlineTransducer.JoinerFilename;
|
||||
}
|
||||
|
||||
model_config.transducer = transducer;
|
||||
model_config.num_threads = num_threads;
|
||||
model_config.debug = debug;
|
||||
model_config.tokens = tokensFilePath;
|
||||
|
||||
SherpaOnnxFeatureConfig feat_config = new SherpaOnnxFeatureConfig();
|
||||
feat_config.sample_rate = sample_rate;
|
||||
feat_config.feature_dim = feature_dim;
|
||||
|
||||
SherpaOnnxOnlineRecognizerConfig sherpaOnnxOnlineRecognizerConfig;
|
||||
sherpaOnnxOnlineRecognizerConfig.decoding_method = decoding_method;
|
||||
sherpaOnnxOnlineRecognizerConfig.feat_config = feat_config;
|
||||
sherpaOnnxOnlineRecognizerConfig.model_config = model_config;
|
||||
sherpaOnnxOnlineRecognizerConfig.max_active_paths = max_active_paths;
|
||||
//endpoint
|
||||
sherpaOnnxOnlineRecognizerConfig.enable_endpoint = enable_endpoint;
|
||||
sherpaOnnxOnlineRecognizerConfig.rule1_min_trailing_silence = rule1_min_trailing_silence;
|
||||
sherpaOnnxOnlineRecognizerConfig.rule2_min_trailing_silence = rule2_min_trailing_silence;
|
||||
sherpaOnnxOnlineRecognizerConfig.rule3_min_utterance_length = rule3_min_utterance_length;
|
||||
|
||||
_onlineRecognizer =
|
||||
SherpaOnnxSharp.CreateOnlineRecognizer(sherpaOnnxOnlineRecognizerConfig);
|
||||
}
|
||||
internal OnlineStream CreateOnlineStream()
|
||||
{
|
||||
SherpaOnnxOnlineStream stream = SherpaOnnxSharp.CreateOnlineStream(_onlineRecognizer);
|
||||
return new OnlineStream(stream);
|
||||
}
|
||||
public void InputFinished(OnlineStream stream)
|
||||
{
|
||||
SherpaOnnxSharp.InputFinished(stream._onlineStream);
|
||||
}
|
||||
public List<OnlineStream> CreateStreams(List<float[]> samplesList)
|
||||
{
|
||||
int batch_size = samplesList.Count;
|
||||
List<OnlineStream> streams = new List<OnlineStream>();
|
||||
for (int i = 0; i < batch_size; i++)
|
||||
{
|
||||
OnlineStream stream = CreateOnlineStream();
|
||||
AcceptWaveform(stream._onlineStream, 16000, samplesList[i]);
|
||||
InputFinished(stream);
|
||||
streams.Add(stream);
|
||||
}
|
||||
return streams;
|
||||
}
|
||||
public OnlineStream CreateStream()
|
||||
{
|
||||
OnlineStream stream = CreateOnlineStream();
|
||||
return stream;
|
||||
}
|
||||
internal void AcceptWaveform(SherpaOnnxOnlineStream stream, int sample_rate, float[] samples)
|
||||
{
|
||||
SherpaOnnxSharp.AcceptOnlineWaveform(stream, sample_rate, samples, samples.Length);
|
||||
}
|
||||
public void AcceptWaveForm(OnlineStream stream, int sample_rate, float[] samples)
|
||||
{
|
||||
AcceptWaveform(stream._onlineStream, sample_rate, samples);
|
||||
}
|
||||
internal IntPtr GetStreamsIntPtr(OnlineStream[] streams)
|
||||
{
|
||||
int streams_len = streams.Length;
|
||||
int size = Marshal.SizeOf(typeof(SherpaOnnxOnlineStream));
|
||||
IntPtr streamsIntPtr = Marshal.AllocHGlobal(size * streams_len);
|
||||
unsafe
|
||||
{
|
||||
byte* ptrbds = (byte*)(streamsIntPtr.ToPointer());
|
||||
for (int i = 0; i < streams_len; i++, ptrbds += (size))
|
||||
{
|
||||
IntPtr streamIntptr = new IntPtr(ptrbds);
|
||||
Marshal.StructureToPtr(streams[i]._onlineStream, streamIntptr, false);
|
||||
}
|
||||
|
||||
}
|
||||
return streamsIntPtr;
|
||||
}
|
||||
internal bool IsReady(OnlineStream stream)
|
||||
{
|
||||
return SherpaOnnxSharp.IsOnlineStreamReady(_onlineRecognizer, stream._onlineStream) != 0;
|
||||
}
|
||||
public void DecodeMultipleStreams(List<OnlineStream> streams)
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
List<OnlineStream> streamList = new List<OnlineStream>();
|
||||
foreach (OnlineStream stream in streams)
|
||||
{
|
||||
if (IsReady(stream))
|
||||
{
|
||||
streamList.Add(stream);
|
||||
}
|
||||
}
|
||||
if (streamList.Count == 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
OnlineStream[] streamsBatch = new OnlineStream[streamList.Count];
|
||||
for (int i = 0; i < streamsBatch.Length; i++)
|
||||
{
|
||||
streamsBatch[i] = streamList[i];
|
||||
}
|
||||
streamList.Clear();
|
||||
IntPtr streamsIntPtr = GetStreamsIntPtr(streamsBatch);
|
||||
SherpaOnnxSharp.DecodeMultipleOnlineStreams(_onlineRecognizer, streamsIntPtr, streamsBatch.Length);
|
||||
Marshal.FreeHGlobal(streamsIntPtr);
|
||||
}
|
||||
}
|
||||
public void DecodeStream(OnlineStream stream)
|
||||
{
|
||||
while (IsReady(stream))
|
||||
{
|
||||
SherpaOnnxSharp.DecodeOnlineStream(_onlineRecognizer, stream._onlineStream);
|
||||
}
|
||||
}
|
||||
internal OnlineRecognizerResultEntity GetResult(SherpaOnnxOnlineStream stream)
|
||||
{
|
||||
IntPtr result_ip = SherpaOnnxSharp.GetOnlineStreamResult(_onlineRecognizer, stream);
|
||||
OnlineRecognizerResult onlineRecognizerResult = new OnlineRecognizerResult(result_ip);
|
||||
#pragma warning disable CS8605 // 取消装箱可能为 null 的值。
|
||||
SherpaOnnxOnlineRecognizerResult result =
|
||||
(SherpaOnnxOnlineRecognizerResult)Marshal.PtrToStructure(
|
||||
onlineRecognizerResult._onlineRecognizerResult, typeof(SherpaOnnxOnlineRecognizerResult));
|
||||
#pragma warning restore CS8605 // 取消装箱可能为 null 的值。
|
||||
|
||||
#pragma warning disable CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。
|
||||
string text = Marshal.PtrToStringAnsi(result.text);
|
||||
#pragma warning restore CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。
|
||||
OnlineRecognizerResultEntity onlineRecognizerResultEntity =
|
||||
new OnlineRecognizerResultEntity();
|
||||
onlineRecognizerResultEntity.text = text;
|
||||
onlineRecognizerResultEntity.text_len = result.text_len;
|
||||
|
||||
return onlineRecognizerResultEntity;
|
||||
}
|
||||
public OnlineRecognizerResultEntity GetResult(OnlineStream stream)
|
||||
{
|
||||
OnlineRecognizerResultEntity result = GetResult(stream._onlineStream);
|
||||
return result;
|
||||
}
|
||||
public List<OnlineRecognizerResultEntity> GetResults(List<OnlineStream> streams)
|
||||
{
|
||||
List<OnlineRecognizerResultEntity> results = new List<OnlineRecognizerResultEntity>();
|
||||
foreach (OnlineStream stream in streams)
|
||||
{
|
||||
OnlineRecognizerResultEntity onlineRecognizerResultEntity = GetResult(stream._onlineStream);
|
||||
results.Add(onlineRecognizerResultEntity);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
protected override void Dispose(bool disposing)
|
||||
{
|
||||
if (!disposing)
|
||||
{
|
||||
SherpaOnnxSharp.DestroyOnlineRecognizer(_onlineRecognizer);
|
||||
_onlineRecognizer.impl = IntPtr.Zero;
|
||||
this._disposed = true;
|
||||
base.Dispose();
|
||||
}
|
||||
}
|
||||
}
|
||||
public class OfflineBase : IDisposable
|
||||
{
|
||||
public void Dispose()
|
||||
{
|
||||
Dispose(disposing: true);
|
||||
GC.SuppressFinalize(this);
|
||||
}
|
||||
protected virtual void Dispose(bool disposing)
|
||||
{
|
||||
if (!disposing)
|
||||
{
|
||||
if (_offlineRecognizerResult != IntPtr.Zero)
|
||||
{
|
||||
SherpaOnnxSharp.DestroyOfflineRecognizerResult(_offlineRecognizerResult);
|
||||
_offlineRecognizerResult = IntPtr.Zero;
|
||||
}
|
||||
if (_offlineStream.impl != IntPtr.Zero)
|
||||
{
|
||||
SherpaOnnxSharp.DestroyOfflineStream(_offlineStream);
|
||||
_offlineStream.impl = IntPtr.Zero;
|
||||
}
|
||||
if (_offlineRecognizer.impl != IntPtr.Zero)
|
||||
{
|
||||
SherpaOnnxSharp.DestroyOfflineRecognizer(_offlineRecognizer);
|
||||
_offlineRecognizer.impl = IntPtr.Zero;
|
||||
}
|
||||
this._disposed = true;
|
||||
}
|
||||
}
|
||||
~OfflineBase()
|
||||
{
|
||||
Dispose(this._disposed);
|
||||
}
|
||||
internal SherpaOnnxOfflineStream _offlineStream;
|
||||
internal IntPtr _offlineRecognizerResult;
|
||||
internal SherpaOnnxOfflineRecognizer _offlineRecognizer;
|
||||
internal bool _disposed = false;
|
||||
}
|
||||
public class OfflineStream : OfflineBase
|
||||
{
|
||||
internal OfflineStream(SherpaOnnxOfflineStream offlineStream)
|
||||
{
|
||||
this._offlineStream = offlineStream;
|
||||
}
|
||||
|
||||
protected override void Dispose(bool disposing)
|
||||
{
|
||||
if (!disposing)
|
||||
{
|
||||
SherpaOnnxSharp.DestroyOfflineStream(_offlineStream);
|
||||
_offlineStream.impl = IntPtr.Zero;
|
||||
this._disposed = true;
|
||||
base.Dispose();
|
||||
}
|
||||
}
|
||||
}
|
||||
public class OfflineRecognizerResult : OfflineBase
|
||||
{
|
||||
internal OfflineRecognizerResult(IntPtr offlineRecognizerResult)
|
||||
{
|
||||
this._offlineRecognizerResult = offlineRecognizerResult;
|
||||
}
|
||||
protected override void Dispose(bool disposing)
|
||||
{
|
||||
if (!disposing)
|
||||
{
|
||||
SherpaOnnxSharp.DestroyOfflineRecognizerResult(_offlineRecognizerResult);
|
||||
_offlineRecognizerResult = IntPtr.Zero;
|
||||
this._disposed = true;
|
||||
base.Dispose(disposing);
|
||||
}
|
||||
}
|
||||
}
|
||||
public class OfflineRecognizer<T> : OfflineBase
|
||||
where T : class, new()
|
||||
{
|
||||
public OfflineRecognizer(T t,
|
||||
string tokensFilePath, string decoding_method = "greedy_search",
|
||||
int sample_rate = 16000, int feature_dim = 80,
|
||||
int num_threads = 2, bool debug = false)
|
||||
{
|
||||
SherpaOnnxOfflineTransducer transducer = new SherpaOnnxOfflineTransducer();
|
||||
SherpaOnnxOfflineParaformer paraformer = new SherpaOnnxOfflineParaformer();
|
||||
SherpaOnnxOfflineNemoEncDecCtc nemo_ctc = new SherpaOnnxOfflineNemoEncDecCtc();
|
||||
SherpaOnnxOfflineModelConfig model_config = new SherpaOnnxOfflineModelConfig();
|
||||
if (t is not null && t.GetType() == typeof(OfflineTransducer))
|
||||
{
|
||||
OfflineTransducer? offlineTransducer = t as OfflineTransducer;
|
||||
#pragma warning disable CS8602 // 解引用可能出现空引用。
|
||||
Trace.Assert(File.Exists(offlineTransducer.DecoderFilename)
|
||||
&& File.Exists(offlineTransducer.EncoderFilename)
|
||||
&& File.Exists(offlineTransducer.JoinerFilename), "Please provide a model");
|
||||
#pragma warning restore CS8602 // 解引用可能出现空引用。
|
||||
Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens");
|
||||
Trace.Assert(num_threads > 0, "num_threads must be greater than 0");
|
||||
transducer.encoder_filename = offlineTransducer.EncoderFilename;
|
||||
transducer.decoder_filename = offlineTransducer.DecoderFilename;
|
||||
transducer.joiner_filename = offlineTransducer.JoinerFilename;
|
||||
}
|
||||
else if (t is not null && t.GetType() == typeof(OfflineParaformer))
|
||||
{
|
||||
OfflineParaformer? offlineParaformer = t as OfflineParaformer;
|
||||
#pragma warning disable CS8602 // 解引用可能出现空引用。
|
||||
Trace.Assert(File.Exists(offlineParaformer.Model), "Please provide a model");
|
||||
#pragma warning restore CS8602 // 解引用可能出现空引用。
|
||||
Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens");
|
||||
Trace.Assert(num_threads > 0, "num_threads must be greater than 0");
|
||||
paraformer.model = offlineParaformer.Model;
|
||||
}
|
||||
else if (t is not null && t.GetType() == typeof(OfflineNemoEncDecCtc))
|
||||
{
|
||||
OfflineNemoEncDecCtc? offlineNemoEncDecCtc = t as OfflineNemoEncDecCtc;
|
||||
#pragma warning disable CS8602 // 解引用可能出现空引用。
|
||||
Trace.Assert(File.Exists(offlineNemoEncDecCtc.Model), "Please provide a model");
|
||||
#pragma warning restore CS8602 // 解引用可能出现空引用。
|
||||
Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens");
|
||||
Trace.Assert(num_threads > 0, "num_threads must be greater than 0");
|
||||
nemo_ctc.model = offlineNemoEncDecCtc.Model;
|
||||
}
|
||||
|
||||
model_config.transducer = transducer;
|
||||
model_config.paraformer = paraformer;
|
||||
model_config.nemo_ctc = nemo_ctc;
|
||||
model_config.num_threads = num_threads;
|
||||
model_config.debug = debug;
|
||||
model_config.tokens = tokensFilePath;
|
||||
|
||||
SherpaOnnxFeatureConfig feat_config = new SherpaOnnxFeatureConfig();
|
||||
feat_config.sample_rate = sample_rate;
|
||||
feat_config.feature_dim = feature_dim;
|
||||
|
||||
SherpaOnnxOfflineRecognizerConfig sherpaOnnxOfflineRecognizerConfig;
|
||||
sherpaOnnxOfflineRecognizerConfig.decoding_method = decoding_method;
|
||||
sherpaOnnxOfflineRecognizerConfig.feat_config = feat_config;
|
||||
sherpaOnnxOfflineRecognizerConfig.model_config = model_config;
|
||||
|
||||
_offlineRecognizer =
|
||||
SherpaOnnxSharp.CreateOfflineRecognizer(sherpaOnnxOfflineRecognizerConfig);
|
||||
}
|
||||
internal OfflineStream CreateOfflineStream()
|
||||
{
|
||||
SherpaOnnxOfflineStream stream = SherpaOnnxSharp.CreateOfflineStream(_offlineRecognizer);
|
||||
return new OfflineStream(stream);
|
||||
}
|
||||
public OfflineStream[] CreateOfflineStream(List<float[]> samplesList)
|
||||
{
|
||||
int batch_size = samplesList.Count;
|
||||
OfflineStream[] streams = new OfflineStream[batch_size];
|
||||
List<string> wavFiles = new List<string>();
|
||||
for (int i = 0; i < batch_size; i++)
|
||||
{
|
||||
OfflineStream stream = CreateOfflineStream();
|
||||
AcceptWaveform(stream._offlineStream, 16000, samplesList[i]);
|
||||
streams[i] = stream;
|
||||
}
|
||||
return streams;
|
||||
}
|
||||
internal void AcceptWaveform(SherpaOnnxOfflineStream stream, int sample_rate, float[] samples)
|
||||
{
|
||||
SherpaOnnxSharp.AcceptWaveform(stream, sample_rate, samples, samples.Length);
|
||||
}
|
||||
internal IntPtr GetStreamsIntPtr(OfflineStream[] streams)
|
||||
{
|
||||
int streams_len = streams.Length;
|
||||
int size = Marshal.SizeOf(typeof(SherpaOnnxOfflineStream));
|
||||
IntPtr streamsIntPtr = Marshal.AllocHGlobal(size * streams_len);
|
||||
unsafe
|
||||
{
|
||||
byte* ptrbds = (byte*)(streamsIntPtr.ToPointer());
|
||||
for (int i = 0; i < streams_len; i++, ptrbds += (size))
|
||||
{
|
||||
IntPtr streamIntptr = new IntPtr(ptrbds);
|
||||
Marshal.StructureToPtr(streams[i]._offlineStream, streamIntptr, false);
|
||||
}
|
||||
}
|
||||
return streamsIntPtr;
|
||||
}
|
||||
public void DecodeMultipleOfflineStreams(OfflineStream[] streams)
|
||||
{
|
||||
IntPtr streamsIntPtr = GetStreamsIntPtr(streams);
|
||||
SherpaOnnxSharp.DecodeMultipleOfflineStreams(_offlineRecognizer, streamsIntPtr, streams.Length);
|
||||
Marshal.FreeHGlobal(streamsIntPtr);
|
||||
}
|
||||
internal OfflineRecognizerResultEntity GetResult(SherpaOnnxOfflineStream stream)
|
||||
{
|
||||
IntPtr result_ip = SherpaOnnxSharp.GetOfflineStreamResult(stream);
|
||||
OfflineRecognizerResult offlineRecognizerResult = new OfflineRecognizerResult(result_ip);
|
||||
#pragma warning disable CS8605 // 取消装箱可能为 null 的值。
|
||||
SherpaOnnxOfflineRecognizerResult result =
|
||||
(SherpaOnnxOfflineRecognizerResult)Marshal.PtrToStructure(
|
||||
offlineRecognizerResult._offlineRecognizerResult, typeof(SherpaOnnxOfflineRecognizerResult));
|
||||
#pragma warning restore CS8605 // 取消装箱可能为 null 的值。
|
||||
|
||||
#pragma warning disable CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。
|
||||
string text = Marshal.PtrToStringAnsi(result.text);
|
||||
#pragma warning restore CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。
|
||||
OfflineRecognizerResultEntity offlineRecognizerResultEntity =
|
||||
new OfflineRecognizerResultEntity();
|
||||
offlineRecognizerResultEntity.text = text;
|
||||
offlineRecognizerResultEntity.text_len = result.text_len;
|
||||
|
||||
return offlineRecognizerResultEntity;
|
||||
}
|
||||
public List<OfflineRecognizerResultEntity> GetResults(OfflineStream[] streams)
|
||||
{
|
||||
List<OfflineRecognizerResultEntity> results = new List<OfflineRecognizerResultEntity>();
|
||||
foreach (OfflineStream stream in streams)
|
||||
{
|
||||
OfflineRecognizerResultEntity offlineRecognizerResultEntity = GetResult(stream._offlineStream);
|
||||
results.Add(offlineRecognizerResultEntity);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
protected override void Dispose(bool disposing)
|
||||
{
|
||||
if (!disposing)
|
||||
{
|
||||
SherpaOnnxSharp.DestroyOfflineRecognizer(_offlineRecognizer);
|
||||
_offlineRecognizer.impl = IntPtr.Zero;
|
||||
this._disposed = true;
|
||||
base.Dispose();
|
||||
}
|
||||
}
|
||||
}
|
||||
internal static partial class SherpaOnnxSharp
|
||||
{
|
||||
private const string dllName = @"SherpaOnnxSharp";
|
||||
|
||||
[DllImport(dllName, EntryPoint = "CreateOfflineRecognizer", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static extern SherpaOnnxOfflineRecognizer CreateOfflineRecognizer(SherpaOnnxOfflineRecognizerConfig config);
|
||||
|
||||
[DllImport(dllName, EntryPoint = "CreateOfflineStream", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static extern SherpaOnnxOfflineStream CreateOfflineStream(SherpaOnnxOfflineRecognizer offlineRecognizer);
|
||||
|
||||
[DllImport(dllName, EntryPoint = "AcceptWaveform", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static extern void AcceptWaveform(SherpaOnnxOfflineStream stream, int sample_rate, float[] samples, int samples_size);
|
||||
|
||||
[DllImport(dllName, EntryPoint = "DecodeOfflineStream", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static extern void DecodeOfflineStream(SherpaOnnxOfflineRecognizer recognizer, SherpaOnnxOfflineStream stream);
|
||||
|
||||
[DllImport(dllName, EntryPoint = "DecodeMultipleOfflineStreams", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static extern void DecodeMultipleOfflineStreams(SherpaOnnxOfflineRecognizer recognizer, IntPtr
|
||||
streams, int n);
|
||||
|
||||
[DllImport(dllName, EntryPoint = "GetOfflineStreamResult", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static extern IntPtr GetOfflineStreamResult(SherpaOnnxOfflineStream stream);
|
||||
|
||||
[DllImport(dllName, EntryPoint = "DestroyOfflineRecognizerResult", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static extern void DestroyOfflineRecognizerResult(IntPtr result);
|
||||
|
||||
[DllImport(dllName, EntryPoint = "DestroyOfflineStream", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static extern void DestroyOfflineStream(SherpaOnnxOfflineStream stream);
|
||||
|
||||
[DllImport(dllName, EntryPoint = "DestroyOfflineRecognizer", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static extern void DestroyOfflineRecognizer(SherpaOnnxOfflineRecognizer offlineRecognizer);
|
||||
|
||||
[DllImport(dllName, EntryPoint = "CreateOnlineRecognizer", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static extern SherpaOnnxOnlineRecognizer CreateOnlineRecognizer(SherpaOnnxOnlineRecognizerConfig config);
|
||||
|
||||
/// Free a pointer returned by CreateOnlineRecognizer()
|
||||
///
|
||||
/// @param p A pointer returned by CreateOnlineRecognizer()
|
||||
[DllImport(dllName, EntryPoint = "DestroyOnlineRecognizer", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static extern void DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer recognizer);
|
||||
|
||||
/// Create an online stream for accepting wave samples.
|
||||
///
|
||||
/// @param recognizer A pointer returned by CreateOnlineRecognizer()
|
||||
/// @return Return a pointer to an OnlineStream. The user has to invoke
|
||||
/// DestroyOnlineStream() to free it to avoid memory leak.
|
||||
[DllImport(dllName, EntryPoint = "CreateOnlineStream", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static extern SherpaOnnxOnlineStream CreateOnlineStream(
|
||||
SherpaOnnxOnlineRecognizer recognizer);
|
||||
|
||||
/// Destroy an online stream.
|
||||
///
|
||||
/// @param stream A pointer returned by CreateOnlineStream()
|
||||
[DllImport(dllName, EntryPoint = "DestroyOnlineStream", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static extern void DestroyOnlineStream(SherpaOnnxOnlineStream stream);
|
||||
|
||||
/// Accept input audio samples and compute the features.
|
||||
/// The user has to invoke DecodeOnlineStream() to run the neural network and
|
||||
/// decoding.
|
||||
///
|
||||
/// @param stream A pointer returned by CreateOnlineStream().
|
||||
/// @param sample_rate Sample rate of the input samples. If it is different
|
||||
/// from config.feat_config.sample_rate, we will do
|
||||
/// resampling inside sherpa-onnx.
|
||||
/// @param samples A pointer to a 1-D array containing audio samples.
|
||||
/// The range of samples has to be normalized to [-1, 1].
|
||||
/// @param n Number of elements in the samples array.
|
||||
[DllImport(dllName, EntryPoint = "AcceptOnlineWaveform", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static extern void AcceptOnlineWaveform(SherpaOnnxOnlineStream stream, int sample_rate,
|
||||
float[] samples, int n);
|
||||
|
||||
/// Return 1 if there are enough number of feature frames for decoding.
|
||||
/// Return 0 otherwise.
|
||||
///
|
||||
/// @param recognizer A pointer returned by CreateOnlineRecognizer
|
||||
/// @param stream A pointer returned by CreateOnlineStream
|
||||
[DllImport(dllName, EntryPoint = "IsOnlineStreamReady", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static extern int IsOnlineStreamReady(SherpaOnnxOnlineRecognizer recognizer,
|
||||
SherpaOnnxOnlineStream stream);
|
||||
|
||||
/// Call this function to run the neural network model and decoding.
|
||||
//
|
||||
/// Precondition for this function: IsOnlineStreamReady() MUST return 1.
|
||||
///
|
||||
/// Usage example:
|
||||
///
|
||||
/// while (IsOnlineStreamReady(recognizer, stream)) {
|
||||
/// DecodeOnlineStream(recognizer, stream);
|
||||
/// }
|
||||
///
|
||||
[DllImport(dllName, EntryPoint = "DecodeOnlineStream", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static extern void DecodeOnlineStream(SherpaOnnxOnlineRecognizer recognizer,
|
||||
SherpaOnnxOnlineStream stream);
|
||||
|
||||
/// This function is similar to DecodeOnlineStream(). It decodes multiple
|
||||
/// OnlineStream in parallel.
|
||||
///
|
||||
/// Caution: The caller has to ensure each OnlineStream is ready, i.e.,
|
||||
/// IsOnlineStreamReady() for that stream should return 1.
|
||||
///
|
||||
/// @param recognizer A pointer returned by CreateOnlineRecognizer()
|
||||
/// @param streams A pointer array containing pointers returned by
|
||||
/// CreateOnlineRecognizer()
|
||||
/// @param n Number of elements in the given streams array.
|
||||
[DllImport(dllName, EntryPoint = "DecodeMultipleOnlineStreams", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static extern void DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer recognizer,
|
||||
IntPtr streams, int n);
|
||||
|
||||
/// Get the decoding results so far for an OnlineStream.
|
||||
///
|
||||
/// @param recognizer A pointer returned by CreateOnlineRecognizer().
|
||||
/// @param stream A pointer returned by CreateOnlineStream().
|
||||
/// @return A pointer containing the result. The user has to invoke
|
||||
/// DestroyOnlineRecognizerResult() to free the returned pointer to
|
||||
/// avoid memory leak.
|
||||
[DllImport(dllName, EntryPoint = "GetOnlineStreamResult", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static extern IntPtr GetOnlineStreamResult(
|
||||
SherpaOnnxOnlineRecognizer recognizer, SherpaOnnxOnlineStream stream);
|
||||
|
||||
/// Destroy the pointer returned by GetOnlineStreamResult().
|
||||
///
|
||||
/// @param r A pointer returned by GetOnlineStreamResult()
|
||||
[DllImport(dllName, EntryPoint = "DestroyOnlineRecognizerResult", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static extern void DestroyOnlineRecognizerResult(IntPtr result);
|
||||
|
||||
/// Reset an OnlineStream , which clears the neural network model state
|
||||
/// and the state for decoding.
|
||||
///
|
||||
/// @param recognizer A pointer returned by CreateOnlineRecognizer().
|
||||
/// @param stream A pointer returned by CreateOnlineStream
|
||||
[DllImport(dllName, EntryPoint = "Reset", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static extern void Reset(SherpaOnnxOnlineRecognizer recognizer,
|
||||
SherpaOnnxOnlineStream stream);
|
||||
|
||||
/// Signal that no more audio samples would be available.
|
||||
/// After this call, you cannot call AcceptWaveform() any more.
|
||||
///
|
||||
/// @param stream A pointer returned by CreateOnlineStream()
|
||||
[DllImport(dllName, EntryPoint = "InputFinished", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static extern void InputFinished(SherpaOnnxOnlineStream stream);
|
||||
|
||||
/// Return 1 if an endpoint has been detected.
|
||||
///
|
||||
/// @param recognizer A pointer returned by CreateOnlineRecognizer()
|
||||
/// @param stream A pointer returned by CreateOnlineStream()
|
||||
/// @return Return 1 if an endpoint is detected. Return 0 otherwise.
|
||||
[DllImport(dllName, EntryPoint = "IsEndpoint", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static extern int IsEndpoint(SherpaOnnxOnlineRecognizer recognizer,
|
||||
SherpaOnnxOnlineStream stream);
|
||||
}
|
||||
internal struct SherpaOnnxOfflineTransducer
|
||||
{
|
||||
public string encoder_filename;
|
||||
public string decoder_filename;
|
||||
public string joiner_filename;
|
||||
public SherpaOnnxOfflineTransducer()
|
||||
{
|
||||
encoder_filename = "";
|
||||
decoder_filename = "";
|
||||
joiner_filename = "";
|
||||
}
|
||||
};
|
||||
internal struct SherpaOnnxOfflineParaformer
|
||||
{
|
||||
public string model;
|
||||
public SherpaOnnxOfflineParaformer()
|
||||
{
|
||||
model = "";
|
||||
}
|
||||
};
|
||||
internal struct SherpaOnnxOfflineNemoEncDecCtc
|
||||
{
|
||||
public string model;
|
||||
public SherpaOnnxOfflineNemoEncDecCtc()
|
||||
{
|
||||
model = "";
|
||||
}
|
||||
};
|
||||
internal struct SherpaOnnxOfflineModelConfig
|
||||
{
|
||||
public SherpaOnnxOfflineTransducer transducer;
|
||||
public SherpaOnnxOfflineParaformer paraformer;
|
||||
public SherpaOnnxOfflineNemoEncDecCtc nemo_ctc;
|
||||
public string tokens;
|
||||
public int num_threads;
|
||||
public bool debug;
|
||||
};
|
||||
/// It expects 16 kHz 16-bit single channel wave format.
|
||||
internal struct SherpaOnnxFeatureConfig
|
||||
{
|
||||
/// Sample rate of the input data. MUST match the one expected
|
||||
/// by the model. For instance, it should be 16000 for models provided
|
||||
/// by us.
|
||||
public int sample_rate;
|
||||
|
||||
/// Feature dimension of the model.
|
||||
/// For instance, it should be 80 for models provided by us.
|
||||
public int feature_dim;
|
||||
};
|
||||
internal struct SherpaOnnxOfflineRecognizerConfig
|
||||
{
|
||||
public SherpaOnnxFeatureConfig feat_config;
|
||||
public SherpaOnnxOfflineModelConfig model_config;
|
||||
|
||||
/// Possible values are: greedy_search, modified_beam_search
|
||||
public string decoding_method;
|
||||
|
||||
};
|
||||
internal struct SherpaOnnxOfflineRecognizer
|
||||
{
|
||||
public IntPtr impl;
|
||||
};
|
||||
[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi, Pack = 1)]
|
||||
internal struct SherpaOnnxOfflineStream
|
||||
{
|
||||
public IntPtr impl;
|
||||
};
|
||||
internal struct SherpaOnnxOfflineRecognizerResult
|
||||
{
|
||||
public IntPtr text;
|
||||
public int text_len;
|
||||
}
|
||||
internal struct SherpaOnnxOnlineTransducer
|
||||
{
|
||||
public string encoder_filename;
|
||||
public string decoder_filename;
|
||||
public string joiner_filename;
|
||||
public SherpaOnnxOnlineTransducer()
|
||||
{
|
||||
encoder_filename = string.Empty;
|
||||
decoder_filename = string.Empty;
|
||||
joiner_filename = string.Empty;
|
||||
}
|
||||
};
|
||||
internal struct SherpaOnnxOnlineModelConfig
|
||||
{
|
||||
public SherpaOnnxOnlineTransducer transducer;
|
||||
public string tokens;
|
||||
public int num_threads;
|
||||
public bool debug; // true to print debug information of the model
|
||||
};
|
||||
internal struct SherpaOnnxOnlineRecognizerConfig
|
||||
{
|
||||
public SherpaOnnxFeatureConfig feat_config;
|
||||
public SherpaOnnxOnlineModelConfig model_config;
|
||||
|
||||
/// Possible values are: greedy_search, modified_beam_search
|
||||
public string decoding_method;
|
||||
|
||||
/// Used only when decoding_method is modified_beam_search
|
||||
/// Example value: 4
|
||||
public int max_active_paths;
|
||||
|
||||
/// 0 to disable endpoint detection.
|
||||
/// A non-zero value to enable endpoint detection.
|
||||
public int enable_endpoint;
|
||||
|
||||
/// An endpoint is detected if trailing silence in seconds is larger than
|
||||
/// this value even if nothing has been decoded.
|
||||
/// Used only when enable_endpoint is not 0.
|
||||
public float rule1_min_trailing_silence;
|
||||
|
||||
/// An endpoint is detected if trailing silence in seconds is larger than
|
||||
/// this value after something that is not blank has been decoded.
|
||||
/// Used only when enable_endpoint is not 0.
|
||||
public float rule2_min_trailing_silence;
|
||||
|
||||
/// An endpoint is detected if the utterance in seconds is larger than
|
||||
/// this value.
|
||||
/// Used only when enable_endpoint is not 0.
|
||||
public float rule3_min_utterance_length;
|
||||
};
|
||||
internal struct SherpaOnnxOnlineRecognizerResult
|
||||
{
|
||||
public IntPtr text;
|
||||
public int text_len;
|
||||
// TODO: Add more fields
|
||||
}
|
||||
internal struct SherpaOnnxOnlineRecognizer
|
||||
{
|
||||
public IntPtr impl;
|
||||
};
|
||||
[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi, Pack = 1)]
|
||||
internal struct SherpaOnnxOnlineStream
|
||||
{
|
||||
public IntPtr impl;
|
||||
};
|
||||
public class OfflineNemoEncDecCtc
|
||||
{
|
||||
private string model = string.Empty;
|
||||
public string Model { get => model; set => model = value; }
|
||||
}
|
||||
public class OfflineParaformer
|
||||
{
|
||||
private string model = string.Empty;
|
||||
public string Model { get => model; set => model = value; }
|
||||
}
|
||||
public class OfflineRecognizerResultEntity
|
||||
{
|
||||
/// <summary>
|
||||
/// recognizer result
|
||||
/// </summary>
|
||||
public string? text { get; set; }
|
||||
/// <summary>
|
||||
/// recognizer result length
|
||||
/// </summary>
|
||||
public int text_len { get; set; }
|
||||
/// <summary>
|
||||
/// decode tokens
|
||||
/// </summary>
|
||||
public List<string>? tokens { get; set; }
|
||||
/// <summary>
|
||||
/// timestamps
|
||||
/// </summary>
|
||||
public List<float>? timestamps { get; set; }
|
||||
}
|
||||
public class OfflineTransducer
|
||||
{
|
||||
private string encoderFilename = string.Empty;
|
||||
private string decoderFilename = string.Empty;
|
||||
private string joinerFilename = string.Empty;
|
||||
public string EncoderFilename { get => encoderFilename; set => encoderFilename = value; }
|
||||
public string DecoderFilename { get => decoderFilename; set => decoderFilename = value; }
|
||||
public string JoinerFilename { get => joinerFilename; set => joinerFilename = value; }
|
||||
}
|
||||
public class OnlineEndpoint
|
||||
{
|
||||
/// 0 to disable endpoint detection.
|
||||
/// A non-zero value to enable endpoint detection.
|
||||
private int enableEndpoint;
|
||||
|
||||
/// An endpoint is detected if trailing silence in seconds is larger than
|
||||
/// this value even if nothing has been decoded.
|
||||
/// Used only when enable_endpoint is not 0.
|
||||
private float rule1MinTrailingSilence;
|
||||
|
||||
/// An endpoint is detected if trailing silence in seconds is larger than
|
||||
/// this value after something that is not blank has been decoded.
|
||||
/// Used only when enable_endpoint is not 0.
|
||||
private float rule2MinTrailingSilence;
|
||||
|
||||
/// An endpoint is detected if the utterance in seconds is larger than
|
||||
/// this value.
|
||||
/// Used only when enable_endpoint is not 0.
|
||||
private float rule3MinUtteranceLength;
|
||||
|
||||
public int EnableEndpoint { get => enableEndpoint; set => enableEndpoint = value; }
|
||||
public float Rule1MinTrailingSilence { get => rule1MinTrailingSilence; set => rule1MinTrailingSilence = value; }
|
||||
public float Rule2MinTrailingSilence { get => rule2MinTrailingSilence; set => rule2MinTrailingSilence = value; }
|
||||
public float Rule3MinUtteranceLength { get => rule3MinUtteranceLength; set => rule3MinUtteranceLength = value; }
|
||||
}
|
||||
public class OnlineRecognizerResultEntity
|
||||
{
|
||||
/// <summary>
|
||||
/// recognizer result
|
||||
/// </summary>
|
||||
public string? text { get; set; }
|
||||
/// <summary>
|
||||
/// recognizer result length
|
||||
/// </summary>
|
||||
public int text_len { get; set; }
|
||||
/// <summary>
|
||||
/// decode tokens
|
||||
/// </summary>
|
||||
public List<string>? tokens { get; set; }
|
||||
/// <summary>
|
||||
/// timestamps
|
||||
/// </summary>
|
||||
public List<float>? timestamps { get; set; }
|
||||
}
|
||||
public class OnlineTransducer
|
||||
{
|
||||
private string encoderFilename = string.Empty;
|
||||
private string decoderFilename = string.Empty;
|
||||
private string joinerFilename = string.Empty;
|
||||
public string EncoderFilename { get => encoderFilename; set => encoderFilename = value; }
|
||||
public string DecoderFilename { get => decoderFilename; set => decoderFilename = value; }
|
||||
public string JoinerFilename { get => joinerFilename; set => joinerFilename = value; }
|
||||
}
|
||||
}
|
||||
@@ -1,10 +0,0 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net6.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||
</PropertyGroup>
|
||||
|
||||
</Project>
|
||||
@@ -1,136 +0,0 @@
|
||||
// sherpa-onnx/sharp-api/offline-api.cpp
|
||||
//
|
||||
// Copyright (c) 2023 Manyeyes Corporation
|
||||
|
||||
#include "offline-api.h"
|
||||
|
||||
#include "sherpa-onnx/csrc/display.h"
|
||||
#include "sherpa-onnx/csrc/offline-recognizer.h"
|
||||
|
||||
namespace sherpa_onnx
|
||||
{
|
||||
struct SherpaOnnxOfflineRecognizer {
|
||||
sherpa_onnx::OfflineRecognizer* impl;
|
||||
};
|
||||
|
||||
struct SherpaOnnxOfflineStream {
|
||||
std::unique_ptr<sherpa_onnx::OfflineStream> impl;
|
||||
explicit SherpaOnnxOfflineStream(std::unique_ptr<sherpa_onnx::OfflineStream> p)
|
||||
: impl(std::move(p)) {}
|
||||
};
|
||||
|
||||
struct SherpaOnnxDisplay {
|
||||
std::unique_ptr<sherpa_onnx::Display> impl;
|
||||
};
|
||||
|
||||
SherpaOnnxOfflineRecognizer* __stdcall CreateOfflineRecognizer(
|
||||
const SherpaOnnxOfflineRecognizerConfig* config) {
|
||||
sherpa_onnx::OfflineRecognizerConfig recognizer_config;
|
||||
|
||||
recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate;
|
||||
recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim;
|
||||
|
||||
if (strlen(config->model_config.transducer.encoder_filename) > 0) {
|
||||
recognizer_config.model_config.transducer.encoder_filename =
|
||||
config->model_config.transducer.encoder_filename;
|
||||
recognizer_config.model_config.transducer.decoder_filename =
|
||||
config->model_config.transducer.decoder_filename;
|
||||
recognizer_config.model_config.transducer.joiner_filename =
|
||||
config->model_config.transducer.joiner_filename;
|
||||
}
|
||||
else if (strlen(config->model_config.paraformer.model) > 0) {
|
||||
recognizer_config.model_config.paraformer.model =
|
||||
config->model_config.paraformer.model;
|
||||
}
|
||||
else if (strlen(config->model_config.nemo_ctc.model) > 0) {
|
||||
recognizer_config.model_config.nemo_ctc.model =
|
||||
config->model_config.nemo_ctc.model;
|
||||
}
|
||||
|
||||
recognizer_config.model_config.tokens =
|
||||
config->model_config.tokens;
|
||||
recognizer_config.model_config.num_threads =
|
||||
config->model_config.num_threads;
|
||||
recognizer_config.model_config.debug =
|
||||
config->model_config.debug;
|
||||
|
||||
recognizer_config.decoding_method = config->decoding_method;
|
||||
|
||||
SherpaOnnxOfflineRecognizer* recognizer =
|
||||
new SherpaOnnxOfflineRecognizer;
|
||||
recognizer->impl =
|
||||
new sherpa_onnx::OfflineRecognizer(recognizer_config);
|
||||
|
||||
return recognizer;
|
||||
}
|
||||
|
||||
SherpaOnnxOfflineStream* __stdcall CreateOfflineStream(
|
||||
SherpaOnnxOfflineRecognizer* recognizer) {
|
||||
SherpaOnnxOfflineStream* stream =
|
||||
new SherpaOnnxOfflineStream(recognizer->impl->CreateStream());
|
||||
return stream;
|
||||
}
|
||||
|
||||
void __stdcall AcceptWaveform(
|
||||
SherpaOnnxOfflineStream* stream,
|
||||
int32_t sample_rate,
|
||||
const float* samples, int32_t samples_size) {
|
||||
std::vector<float> waveform{ samples, samples + samples_size };
|
||||
stream->impl->AcceptWaveform(sample_rate, waveform.data(), waveform.size());
|
||||
}
|
||||
|
||||
void __stdcall DecodeOfflineStream(
|
||||
SherpaOnnxOfflineRecognizer* recognizer,
|
||||
SherpaOnnxOfflineStream* stream) {
|
||||
recognizer->impl->DecodeStream(stream->impl.get());
|
||||
}
|
||||
|
||||
void __stdcall DecodeMultipleOfflineStreams(
|
||||
SherpaOnnxOfflineRecognizer* recognizer,
|
||||
SherpaOnnxOfflineStream** streams, int32_t n) {
|
||||
std::vector<sherpa_onnx::OfflineStream*> ss(n);
|
||||
for (int32_t i = 0; i != n; ++i) {
|
||||
ss[i] = streams[i]->impl.get();
|
||||
}
|
||||
recognizer->impl->DecodeStreams(ss.data(), n);
|
||||
}
|
||||
|
||||
SherpaOnnxOfflineRecognizerResult* __stdcall GetOfflineStreamResult(
|
||||
SherpaOnnxOfflineStream* stream) {
|
||||
sherpa_onnx::OfflineRecognitionResult result =
|
||||
stream->impl->GetResult();
|
||||
const auto& text = result.text;
|
||||
auto r = new SherpaOnnxOfflineRecognizerResult;
|
||||
r->text = new char[text.size() + 1];
|
||||
std::copy(text.begin(), text.end(), const_cast<char*>(r->text));
|
||||
const_cast<char*>(r->text)[text.size()] = 0;
|
||||
r->text_len = text.size();
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
/// Free a pointer returned by CreateOfflineRecognizer()
|
||||
///
|
||||
/// @param p A pointer returned by CreateOfflineRecognizer()
|
||||
void __stdcall DestroyOfflineRecognizer(
|
||||
SherpaOnnxOfflineRecognizer* recognizer) {
|
||||
delete recognizer->impl;
|
||||
delete recognizer;
|
||||
}
|
||||
|
||||
/// Destory an offline stream.
|
||||
///
|
||||
/// @param stream A pointer returned by CreateOfflineStream()
|
||||
void __stdcall DestroyOfflineStream(SherpaOnnxOfflineStream* stream) {
|
||||
delete stream;
|
||||
}
|
||||
|
||||
/// Destroy the pointer returned by GetOfflineStreamResult().
|
||||
///
|
||||
/// @param r A pointer returned by GetOfflineStreamResult()
|
||||
void __stdcall DestroyOfflineRecognizerResult(
|
||||
SherpaOnnxOfflineRecognizerResult* r) {
|
||||
delete r->text;
|
||||
delete r;
|
||||
}
|
||||
}// namespace sherpa_onnx
|
||||
@@ -1,122 +0,0 @@
|
||||
// sherpa-onnx/sharp-api/offline-api.h
|
||||
//
|
||||
// Copyright (c) 2023 Manyeyes Corporation
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <list>
|
||||
|
||||
namespace sherpa_onnx
|
||||
{
|
||||
/// Please refer to
|
||||
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||
/// to download pre-trained models. That is, you can find encoder-xxx.onnx
|
||||
/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct
|
||||
/// from there.
|
||||
typedef struct SherpaOnnxOfflineTransducer {
|
||||
const char* encoder_filename;
|
||||
const char* decoder_filename;
|
||||
const char* joiner_filename;
|
||||
} SherpaOnnxOfflineTransducer;
|
||||
|
||||
typedef struct SherpaOnnxOfflineParaformer {
|
||||
const char* model;
|
||||
}SherpaOnnxOfflineParaformer;
|
||||
|
||||
typedef struct SherpaOnnxOfflineNemoEncDecCtc {
|
||||
const char* model;
|
||||
}SherpaOnnxOfflineNemoEncDecCtc;
|
||||
|
||||
|
||||
typedef struct SherpaOnnxOfflineModelConfig {
|
||||
SherpaOnnxOfflineTransducer transducer;
|
||||
SherpaOnnxOfflineParaformer paraformer;
|
||||
SherpaOnnxOfflineNemoEncDecCtc nemo_ctc;
|
||||
const char* tokens;
|
||||
const int32_t num_threads;
|
||||
const bool debug;
|
||||
} SherpaOnnxOfflineModelConfig;
|
||||
|
||||
/// It expects 16 kHz 16-bit single channel wave format.
|
||||
typedef struct SherpaOnnxFeatureConfig {
|
||||
/// Sample rate of the input data. MUST match the one expected
|
||||
/// by the model. For instance, it should be 16000 for models provided
|
||||
/// by us.
|
||||
int32_t sample_rate;
|
||||
|
||||
/// Feature dimension of the model.
|
||||
/// For instance, it should be 80 for models provided by us.
|
||||
int32_t feature_dim;
|
||||
} SherpaOnnxFeatureConfig;
|
||||
|
||||
typedef struct SherpaOnnxOfflineRecognizerConfig {
|
||||
SherpaOnnxFeatureConfig feat_config;
|
||||
SherpaOnnxOfflineModelConfig model_config;
|
||||
|
||||
/// Possible values are: greedy_search, modified_beam_search
|
||||
const char* decoding_method;
|
||||
|
||||
} SherpaOnnxOfflineRecognizerConfig;
|
||||
|
||||
typedef struct SherpaOnnxOfflineRecognizerResult {
|
||||
// Recognition results.
|
||||
// For English, it consists of space separated words.
|
||||
// For Chinese, it consists of Chinese words without spaces.
|
||||
char* text;
|
||||
int text_len;
|
||||
|
||||
// Decoded results at the token level.
|
||||
// For instance, for BPE-based models it consists of a list of BPE tokens.
|
||||
// std::vector<std::string> tokens;
|
||||
|
||||
// timestamps.size() == tokens.size()
|
||||
// timestamps[i] records the time in seconds when tokens[i] is decoded.
|
||||
// std::vector<float> timestamps;
|
||||
} SherpaOnnxOfflineRecognizerResult;
|
||||
|
||||
/// Note: OfflineRecognizer here means StreamingRecognizer.
|
||||
/// It does not need to access the Internet during recognition.
|
||||
/// Everything is run locally.
|
||||
typedef struct SherpaOnnxOfflineRecognizer SherpaOnnxOfflineRecognizer;
|
||||
|
||||
typedef struct SherpaOnnxOfflineStream SherpaOnnxOfflineStream;
|
||||
|
||||
extern "C" __declspec(dllexport)
|
||||
SherpaOnnxOfflineRecognizer * __stdcall CreateOfflineRecognizer(
|
||||
const SherpaOnnxOfflineRecognizerConfig * config);
|
||||
|
||||
extern "C" __declspec(dllexport)
|
||||
SherpaOnnxOfflineStream * __stdcall CreateOfflineStream(
|
||||
SherpaOnnxOfflineRecognizer * sherpaOnnxOfflineRecognizer);
|
||||
|
||||
extern "C" __declspec(dllexport)
|
||||
void __stdcall AcceptWaveform(
|
||||
SherpaOnnxOfflineStream * stream, int32_t sample_rate,
|
||||
const float* samples, int32_t samples_size);
|
||||
|
||||
extern "C" __declspec(dllexport)
|
||||
void __stdcall DecodeOfflineStream(
|
||||
SherpaOnnxOfflineRecognizer * recognizer,
|
||||
SherpaOnnxOfflineStream * stream);
|
||||
|
||||
extern "C" __declspec(dllexport)
|
||||
void __stdcall DecodeMultipleOfflineStreams(
|
||||
SherpaOnnxOfflineRecognizer * recognizer,
|
||||
SherpaOnnxOfflineStream * *streams, int32_t n);
|
||||
|
||||
extern "C" __declspec(dllexport)
|
||||
SherpaOnnxOfflineRecognizerResult * __stdcall GetOfflineStreamResult(
|
||||
SherpaOnnxOfflineStream * stream);
|
||||
|
||||
extern "C" __declspec(dllexport)
|
||||
void __stdcall DestroyOfflineRecognizer(
|
||||
SherpaOnnxOfflineRecognizer * recognizer);
|
||||
|
||||
extern "C" __declspec(dllexport)
|
||||
void __stdcall DestroyOfflineStream(
|
||||
SherpaOnnxOfflineStream * stream);
|
||||
|
||||
extern "C" __declspec(dllexport)
|
||||
void __stdcall DestroyOfflineRecognizerResult(
|
||||
SherpaOnnxOfflineRecognizerResult * r);
|
||||
}// namespace sherpa_onnx
|
||||
@@ -1,148 +0,0 @@
|
||||
// sherpa-onnx/cpp-api/c-api.cc
|
||||
//
|
||||
// Copyright (c) 2023 Xiaomi Corporation
|
||||
|
||||
#include "online-api.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "../../sherpa-onnx/csrc/display.h"
|
||||
#include "../../sherpa-onnx/csrc/online-recognizer.h"
|
||||
namespace sherpa_onnx
|
||||
{
|
||||
struct SherpaOnnxOnlineRecognizer {
|
||||
sherpa_onnx::OnlineRecognizer* impl;
|
||||
};
|
||||
|
||||
struct SherpaOnnxOnlineStream {
|
||||
std::unique_ptr<sherpa_onnx::OnlineStream> impl;
|
||||
explicit SherpaOnnxOnlineStream(std::unique_ptr<sherpa_onnx::OnlineStream> p)
|
||||
: impl(std::move(p)) {}
|
||||
};
|
||||
|
||||
struct SherpaOnnxDisplay {
|
||||
std::unique_ptr<sherpa_onnx::Display> impl;
|
||||
};
|
||||
|
||||
SherpaOnnxOnlineRecognizer* __stdcall CreateOnlineRecognizer(
|
||||
const SherpaOnnxOnlineRecognizerConfig* config) {
|
||||
sherpa_onnx::OnlineRecognizerConfig recognizer_config;
|
||||
|
||||
recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate;
|
||||
recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim;
|
||||
|
||||
recognizer_config.model_config.encoder_filename =
|
||||
config->model_config.transducer.encoder;
|
||||
recognizer_config.model_config.decoder_filename =
|
||||
config->model_config.transducer.decoder;
|
||||
recognizer_config.model_config.joiner_filename = config->model_config.transducer.joiner;
|
||||
recognizer_config.model_config.tokens = config->model_config.tokens;
|
||||
recognizer_config.model_config.num_threads = config->model_config.num_threads;
|
||||
recognizer_config.model_config.debug = config->model_config.debug;
|
||||
|
||||
recognizer_config.decoding_method = config->decoding_method;
|
||||
recognizer_config.max_active_paths = config->max_active_paths;
|
||||
|
||||
recognizer_config.enable_endpoint = config->enable_endpoint;
|
||||
|
||||
recognizer_config.endpoint_config.rule1.min_trailing_silence =
|
||||
config->rule1_min_trailing_silence;
|
||||
|
||||
recognizer_config.endpoint_config.rule2.min_trailing_silence =
|
||||
config->rule2_min_trailing_silence;
|
||||
|
||||
recognizer_config.endpoint_config.rule3.min_utterance_length =
|
||||
config->rule3_min_utterance_length;
|
||||
|
||||
SherpaOnnxOnlineRecognizer* recognizer = new SherpaOnnxOnlineRecognizer;
|
||||
recognizer->impl = new sherpa_onnx::OnlineRecognizer(recognizer_config);
|
||||
|
||||
return recognizer;
|
||||
}
|
||||
|
||||
void __stdcall DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer* recognizer) {
|
||||
delete recognizer->impl;
|
||||
delete recognizer;
|
||||
}
|
||||
|
||||
SherpaOnnxOnlineStream* __stdcall CreateOnlineStream(
|
||||
const SherpaOnnxOnlineRecognizer* recognizer) {
|
||||
SherpaOnnxOnlineStream* stream =
|
||||
new SherpaOnnxOnlineStream(recognizer->impl->CreateStream());
|
||||
return stream;
|
||||
}
|
||||
|
||||
void __stdcall DestroyOnlineStream(SherpaOnnxOnlineStream* stream) { delete stream; }
|
||||
|
||||
void __stdcall AcceptOnlineWaveform(SherpaOnnxOnlineStream* stream, int32_t sample_rate,
|
||||
const float* samples, int32_t n) {
|
||||
stream->impl->AcceptWaveform(sample_rate, samples, n);
|
||||
}
|
||||
|
||||
int32_t __stdcall IsOnlineStreamReady(SherpaOnnxOnlineRecognizer* recognizer,
|
||||
SherpaOnnxOnlineStream* stream) {
|
||||
return recognizer->impl->IsReady(stream->impl.get());
|
||||
}
|
||||
|
||||
void __stdcall DecodeOnlineStream(SherpaOnnxOnlineRecognizer* recognizer,
|
||||
SherpaOnnxOnlineStream* stream) {
|
||||
recognizer->impl->DecodeStream(stream->impl.get());
|
||||
}
|
||||
|
||||
void __stdcall DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer* recognizer,
|
||||
SherpaOnnxOnlineStream** streams, int32_t n) {
|
||||
std::vector<sherpa_onnx::OnlineStream*> ss(n);
|
||||
for (int32_t i = 0; i != n; ++i) {
|
||||
ss[i] = streams[i]->impl.get();
|
||||
}
|
||||
recognizer->impl->DecodeStreams(ss.data(), n);
|
||||
}
|
||||
|
||||
SherpaOnnxOnlineRecognizerResult* __stdcall GetOnlineStreamResult(
|
||||
SherpaOnnxOnlineRecognizer* recognizer, SherpaOnnxOnlineStream* stream) {
|
||||
sherpa_onnx::OnlineRecognizerResult result =
|
||||
recognizer->impl->GetResult(stream->impl.get());
|
||||
const auto& text = result.text;
|
||||
|
||||
auto r = new SherpaOnnxOnlineRecognizerResult;
|
||||
r->text = new char[text.size() + 1];
|
||||
std::copy(text.begin(), text.end(), const_cast<char*>(r->text));
|
||||
const_cast<char*>(r->text)[text.size()] = 0;
|
||||
r->text_len = text.size();
|
||||
return r;
|
||||
}
|
||||
|
||||
void __stdcall DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult* r) {
|
||||
delete[] r->text;
|
||||
delete r;
|
||||
}
|
||||
|
||||
void __stdcall Reset(SherpaOnnxOnlineRecognizer* recognizer,
|
||||
SherpaOnnxOnlineStream* stream) {
|
||||
recognizer->impl->Reset(stream->impl.get());
|
||||
}
|
||||
|
||||
void __stdcall InputFinished(SherpaOnnxOnlineStream* stream) {
|
||||
stream->impl->InputFinished();
|
||||
}
|
||||
|
||||
int32_t __stdcall IsEndpoint(SherpaOnnxOnlineRecognizer* recognizer,
|
||||
SherpaOnnxOnlineStream* stream) {
|
||||
return recognizer->impl->IsEndpoint(stream->impl.get());
|
||||
}
|
||||
|
||||
SherpaOnnxDisplay* __stdcall CreateDisplay(int32_t max_word_per_line) {
|
||||
SherpaOnnxDisplay* ans = new SherpaOnnxDisplay;
|
||||
ans->impl = std::make_unique<sherpa_onnx::Display>(max_word_per_line);
|
||||
return ans;
|
||||
}
|
||||
|
||||
void __stdcall DestroyDisplay(SherpaOnnxDisplay* display) { delete display; }
|
||||
|
||||
void __stdcall SherpaOnnxPrint(SherpaOnnxDisplay* display, int32_t idx, const char* s) {
|
||||
display->impl->Print(idx, s);
|
||||
}
|
||||
}
|
||||
@@ -1,238 +0,0 @@
|
||||
// sherpa-onnx/cpp-api/c-api.h
|
||||
//
|
||||
// Copyright (c) 2023 Xiaomi Corporation
|
||||
|
||||
// C API for sherpa-onnx
|
||||
//
|
||||
// Please refer to
|
||||
// https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c
|
||||
// for usages.
|
||||
//
|
||||
|
||||
#ifndef SHERPA_ONNX_CPP_API_C_API_H_
|
||||
#define SHERPA_ONNX_CPP_API_C_API_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
namespace sherpa_onnx
|
||||
{
|
||||
/// Please refer to
|
||||
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||
/// to download pre-trained models. That is, you can find encoder-xxx.onnx
|
||||
/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct
|
||||
/// from there.
|
||||
typedef struct SherpaOnnxOnlineTransducer {
|
||||
const char* encoder;
|
||||
const char* decoder;
|
||||
const char* joiner;
|
||||
} SherpaOnnxOnlineTransducer;
|
||||
|
||||
typedef struct SherpaOnnxOnlineModelConfig
|
||||
{
|
||||
const SherpaOnnxOnlineTransducer transducer;
|
||||
const char* tokens;
|
||||
const int32_t num_threads;
|
||||
const bool debug; // true to print debug information of the model
|
||||
}SherpaOnnxOnlineModelConfig;
|
||||
|
||||
/// It expects 16 kHz 16-bit single channel wave format.
|
||||
typedef struct SherpaOnnxFeatureConfig {
|
||||
/// Sample rate of the input data. MUST match the one expected
|
||||
/// by the model. For instance, it should be 16000 for models provided
|
||||
/// by us.
|
||||
int32_t sample_rate;
|
||||
|
||||
/// Feature dimension of the model.
|
||||
/// For instance, it should be 80 for models provided by us.
|
||||
int32_t feature_dim;
|
||||
} SherpaOnnxFeatureConfig;
|
||||
|
||||
typedef struct SherpaOnnxOnlineRecognizerConfig {
|
||||
SherpaOnnxFeatureConfig feat_config;
|
||||
SherpaOnnxOnlineModelConfig model_config;
|
||||
|
||||
/// Possible values are: greedy_search, modified_beam_search
|
||||
const char* decoding_method;
|
||||
|
||||
/// Used only when decoding_method is modified_beam_search
|
||||
/// Example value: 4
|
||||
int32_t max_active_paths;
|
||||
|
||||
/// 0 to disable endpoint detection.
|
||||
/// A non-zero value to enable endpoint detection.
|
||||
int enable_endpoint;
|
||||
|
||||
/// An endpoint is detected if trailing silence in seconds is larger than
|
||||
/// this value even if nothing has been decoded.
|
||||
/// Used only when enable_endpoint is not 0.
|
||||
float rule1_min_trailing_silence;
|
||||
|
||||
/// An endpoint is detected if trailing silence in seconds is larger than
|
||||
/// this value after something that is not blank has been decoded.
|
||||
/// Used only when enable_endpoint is not 0.
|
||||
float rule2_min_trailing_silence;
|
||||
|
||||
/// An endpoint is detected if the utterance in seconds is larger than
|
||||
/// this value.
|
||||
/// Used only when enable_endpoint is not 0.
|
||||
float rule3_min_utterance_length;
|
||||
} SherpaOnnxOnlineRecognizerConfig;
|
||||
|
||||
typedef struct SherpaOnnxOnlineRecognizerResult {
|
||||
const char* text;
|
||||
int text_len;
|
||||
// TODO(fangjun): Add more fields
|
||||
} SherpaOnnxOnlineRecognizerResult;
|
||||
|
||||
/// Note: OnlineRecognizer here means StreamingRecognizer.
|
||||
/// It does not need to access the Internet during recognition.
|
||||
/// Everything is run locally.
|
||||
typedef struct SherpaOnnxOnlineRecognizer SherpaOnnxOnlineRecognizer;
|
||||
typedef struct SherpaOnnxOnlineStream SherpaOnnxOnlineStream;
|
||||
|
||||
/// @param config Config for the recongizer.
|
||||
/// @return Return a pointer to the recognizer. The user has to invoke
|
||||
// DestroyOnlineRecognizer() to free it to avoid memory leak.
|
||||
extern "C" __declspec(dllexport)
|
||||
SherpaOnnxOnlineRecognizer* __stdcall CreateOnlineRecognizer(
|
||||
const SherpaOnnxOnlineRecognizerConfig * config);
|
||||
|
||||
/// Free a pointer returned by CreateOnlineRecognizer()
|
||||
///
|
||||
/// @param p A pointer returned by CreateOnlineRecognizer()
|
||||
extern "C" __declspec(dllexport)
|
||||
void __stdcall DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer* recognizer);
|
||||
|
||||
/// Create an online stream for accepting wave samples.
|
||||
///
|
||||
/// @param recognizer A pointer returned by CreateOnlineRecognizer()
|
||||
/// @return Return a pointer to an OnlineStream. The user has to invoke
|
||||
/// DestroyOnlineStream() to free it to avoid memory leak.
|
||||
extern "C" __declspec(dllexport)
|
||||
SherpaOnnxOnlineStream* __stdcall CreateOnlineStream(
|
||||
const SherpaOnnxOnlineRecognizer* recognizer);
|
||||
|
||||
/// Destroy an online stream.
|
||||
///
|
||||
/// @param stream A pointer returned by CreateOnlineStream()
|
||||
extern "C" __declspec(dllexport)
|
||||
void __stdcall DestroyOnlineStream(SherpaOnnxOnlineStream* stream);
|
||||
|
||||
/// Accept input audio samples and compute the features.
|
||||
/// The user has to invoke DecodeOnlineStream() to run the neural network and
|
||||
/// decoding.
|
||||
///
|
||||
/// @param stream A pointer returned by CreateOnlineStream().
|
||||
/// @param sample_rate Sample rate of the input samples. If it is different
|
||||
/// from config.feat_config.sample_rate, we will do
|
||||
/// resampling inside sherpa-onnx.
|
||||
/// @param samples A pointer to a 1-D array containing audio samples.
|
||||
/// The range of samples has to be normalized to [-1, 1].
|
||||
/// @param n Number of elements in the samples array.
|
||||
extern "C" __declspec(dllexport)
|
||||
void __stdcall AcceptOnlineWaveform(SherpaOnnxOnlineStream* stream, int32_t sample_rate,
|
||||
const float* samples, int32_t n);
|
||||
|
||||
/// Return 1 if there are enough number of feature frames for decoding.
|
||||
/// Return 0 otherwise.
|
||||
///
|
||||
/// @param recognizer A pointer returned by CreateOnlineRecognizer
|
||||
/// @param stream A pointer returned by CreateOnlineStream
|
||||
extern "C" __declspec(dllexport)
|
||||
int32_t __stdcall IsOnlineStreamReady(SherpaOnnxOnlineRecognizer* recognizer,
|
||||
SherpaOnnxOnlineStream* stream);
|
||||
|
||||
/// Call this function to run the neural network model and decoding.
|
||||
//
|
||||
/// Precondition for this function: IsOnlineStreamReady() MUST return 1.
|
||||
///
|
||||
/// Usage example:
|
||||
///
|
||||
/// while (IsOnlineStreamReady(recognizer, stream)) {
|
||||
/// DecodeOnlineStream(recognizer, stream);
|
||||
/// }
|
||||
///
|
||||
extern "C" __declspec(dllexport)
|
||||
void __stdcall DecodeOnlineStream(SherpaOnnxOnlineRecognizer* recognizer,
|
||||
SherpaOnnxOnlineStream* stream);
|
||||
|
||||
/// This function is similar to DecodeOnlineStream(). It decodes multiple
|
||||
/// OnlineStream in parallel.
|
||||
///
|
||||
/// Caution: The caller has to ensure each OnlineStream is ready, i.e.,
|
||||
/// IsOnlineStreamReady() for that stream should return 1.
|
||||
///
|
||||
/// @param recognizer A pointer returned by CreateOnlineRecognizer()
|
||||
/// @param streams A pointer array containing pointers returned by
|
||||
/// CreateOnlineRecognizer()
|
||||
/// @param n Number of elements in the given streams array.
|
||||
extern "C" __declspec(dllexport)
|
||||
void __stdcall DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer* recognizer,
|
||||
SherpaOnnxOnlineStream** streams, int32_t n);
|
||||
|
||||
/// Get the decoding results so far for an OnlineStream.
|
||||
///
|
||||
/// @param recognizer A pointer returned by CreateOnlineRecognizer().
|
||||
/// @param stream A pointer returned by CreateOnlineStream().
|
||||
/// @return A pointer containing the result. The user has to invoke
|
||||
/// DestroyOnlineRecognizerResult() to free the returned pointer to
|
||||
/// avoid memory leak.
|
||||
extern "C" __declspec(dllexport)
|
||||
SherpaOnnxOnlineRecognizerResult* __stdcall GetOnlineStreamResult(
|
||||
SherpaOnnxOnlineRecognizer* recognizer, SherpaOnnxOnlineStream* stream);
|
||||
|
||||
/// Destroy the pointer returned by GetOnlineStreamResult().
|
||||
///
|
||||
/// @param r A pointer returned by GetOnlineStreamResult()
|
||||
extern "C" __declspec(dllexport)
|
||||
void __stdcall DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult* r);
|
||||
|
||||
/// Reset an OnlineStream , which clears the neural network model state
|
||||
/// and the state for decoding.
|
||||
///
|
||||
/// @param recognizer A pointer returned by CreateOnlineRecognizer().
|
||||
/// @param stream A pointer returned by CreateOnlineStream
|
||||
extern "C" __declspec(dllexport)
|
||||
void __stdcall Reset(SherpaOnnxOnlineRecognizer* recognizer,
|
||||
SherpaOnnxOnlineStream* stream);
|
||||
|
||||
/// Signal that no more audio samples would be available.
|
||||
/// After this call, you cannot call AcceptWaveform() any more.
|
||||
///
|
||||
/// @param stream A pointer returned by CreateOnlineStream()
|
||||
extern "C" __declspec(dllexport)
|
||||
void __stdcall InputFinished(SherpaOnnxOnlineStream* stream);
|
||||
|
||||
/// Return 1 if an endpoint has been detected.
|
||||
///
|
||||
/// @param recognizer A pointer returned by CreateOnlineRecognizer()
|
||||
/// @param stream A pointer returned by CreateOnlineStream()
|
||||
/// @return Return 1 if an endpoint is detected. Return 0 otherwise.
|
||||
extern "C" __declspec(dllexport)
|
||||
int32_t __stdcall IsEndpoint(SherpaOnnxOnlineRecognizer* recognizer,
|
||||
SherpaOnnxOnlineStream* stream);
|
||||
|
||||
// for displaying results on Linux/macOS.
|
||||
typedef struct SherpaOnnxDisplay SherpaOnnxDisplay;
|
||||
|
||||
/// Create a display object. Must be freed using DestroyDisplay to avoid
|
||||
/// memory leak.
|
||||
extern "C" __declspec(dllexport)
|
||||
SherpaOnnxDisplay* __stdcall CreateDisplay(int32_t max_word_per_line);
|
||||
|
||||
extern "C" __declspec(dllexport)
|
||||
void __stdcall DestroyDisplay(SherpaOnnxDisplay* display);
|
||||
|
||||
/// Print the result.
|
||||
extern "C" __declspec(dllexport)
|
||||
void __stdcall SherpaOnnxPrint(SherpaOnnxDisplay* display, int32_t idx, const char* s);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif // SHERPA_ONNX_C_API_C_API_H_
|
||||
Reference in New Issue
Block a user