diff --git a/.github/workflows/dot-net.yaml b/.github/workflows/dot-net.yaml new file mode 100644 index 00000000..12816515 --- /dev/null +++ b/.github/workflows/dot-net.yaml @@ -0,0 +1,135 @@ +name: dot-net + +on: + push: + branches: + - dot-net + tags: + - '*' + +concurrency: + group: dot-net-${{ github.ref }} + cancel-in-progress: true + +jobs: + build-libs: + name: dot-net for ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + + steps: + - uses: actions/checkout@v2 + # see https://cibuildwheel.readthedocs.io/en/stable/changelog/ + # for a list of versions + - name: Build wheels + uses: pypa/cibuildwheel@v2.11.4 + env: + CIBW_BEFORE_BUILD: "pip install -U cmake numpy" + CIBW_BUILD: "cp38-*64" + CIBW_SKIP: "cp27-* cp35-* cp36-* *-win32 pp* *-musllinux* *-manylinux_i686" + CIBW_BUILD_VERBOSITY: 3 + CIBW_ENVIRONMENT_LINUX: LD_LIBRARY_PATH='/project/build/bdist.linux-x86_64/wheel/sherpa_onnx/lib' + CIBW_REPAIR_WHEEL_COMMAND_MACOS: "" + + - name: Display wheels + shell: bash + run: | + ls -lh ./wheelhouse/*.whl + unzip -l ./wheelhouse/*.whl + + - uses: actions/upload-artifact@v2 + with: + name: ${{ matrix.os }}-wheels + path: ./wheelhouse/*.whl + + build-nuget-packages: + name: build-nuget-packages + runs-on: ubuntu-latest + needs: build-libs + + steps: + - uses: actions/checkout@v2 + + - name: Retrieve artifact from ubuntu-latest + uses: actions/download-artifact@v2 + with: + name: ubuntu-latest-wheels + path: ./linux + + - name: Retrieve artifact from macos-latest + uses: actions/download-artifact@v2 + with: + name: macos-latest-wheels + path: ./macos + + - name: Retrieve artifact from windows-latest + uses: actions/download-artifact@v2 + with: + name: windows-latest-wheels + path: ./windows + + - name: Display wheels + shell: bash + run: | + tree . + + - name: Unzip Ubuntu wheels + shell: bash + run: | + cd linux + unzip ./*.whl + tree . + + - name: Unzip macOS wheels + shell: bash + run: | + cd macos + unzip ./*.whl + tree . + + - name: Unzip Windows wheels + shell: bash + run: | + cd windows + unzip ./*.whl + cp -v ./*.dll sherpa_onnx/lib/ + tree . + + - name: Setup .NET Core 3.1 + uses: actions/setup-dotnet@v1 + with: + dotnet-version: 3.1.x + + - name: Setup .NET 7.0 + uses: actions/setup-dotnet@v1 + with: + dotnet-version: 7.0.x + + - name: Check dotnet + run: dotnet --info + + - name: build nuget packages + shell: bash + run: | + cd scripts/dotnet + ./run.sh + ls -lh packages + + - uses: actions/upload-artifact@v2 + name: upload nuget packages + with: + name: nuget-packages + path: scripts/dotnet/packages/*.nupkg + + - name: publish .Net packages to nuget.org + if: github.repository == 'csukuangfj/sherpa-onnx' || github.repository == 'k2-fsa/sherpa-onnx' + shell: bash + env: + API_KEY: ${{ secrets.NUGET_API_KEY }} + run: | + # API_KEY is valid until 2024.05.02 + cd scripts/dotnet/packages + dotnet nuget push ./org.k2fsa.sherpa.onnx.*.nupkg --skip-duplicate --api-key $API_KEY --source https://api.nuget.org/v3/index.json diff --git a/.github/workflows/test-dot-net.yaml b/.github/workflows/test-dot-net.yaml new file mode 100644 index 00000000..2118d6c9 --- /dev/null +++ b/.github/workflows/test-dot-net.yaml @@ -0,0 +1,70 @@ +name: test-dot-net + +on: + push: + branches: + - master + paths: + - '.github/workflows/test-dot-net' + - 'dotnet-examples/**' + + pull_request: + branches: + - master + paths: + - '.github/workflows/test-dot-net' + - 'dotnet-examples/**' + + schedule: + # minute (0-59) + # hour (0-23) + # day of the month (1-31) + # month (1-12) + # day of the week (0-6) + # nightly build at 23:50 UTC time every day + - cron: "50 23 * * *" + +concurrency: + group: test-dot-net + cancel-in-progress: true + +permissions: + contents: read + +jobs: + test-dot-net: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Setup .NET Core 3.1 + uses: actions/setup-dotnet@v1 + with: + dotnet-version: 3.1.x + + - name: Setup .NET 6.0 + uses: actions/setup-dotnet@v1 + with: + dotnet-version: 6.0.x + + - name: Check dotnet + run: dotnet --info + + - name: Decode a file + shell: bash + run: | + cd dotnet-examples/ + cd online-decode-files + ./run.sh + + cd ../offline-decode-files + ./run-nemo-ctc.sh + ./run-paraformer.sh + ./run-zipformer.sh diff --git a/.gitignore b/.gitignore index 6971f74a..dc00d2e5 100644 --- a/.gitignore +++ b/.gitignore @@ -57,3 +57,4 @@ sherpa-onnx-nemo-ctc-en-citrinet-512 run-offline-decode-files-nemo-ctc.sh *.jar sherpa-onnx-nemo-ctc-* +*.wav diff --git a/CMakeLists.txt b/CMakeLists.txt index 5330ecfa..209f5e33 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR) project(sherpa-onnx) -set(SHERPA_ONNX_VERSION "1.4.1") +set(SHERPA_ONNX_VERSION "1.4.2") # Disable warning about # @@ -37,16 +37,12 @@ endif() set(CMAKE_INSTALL_RPATH ${SHERPA_ONNX_RPATH_ORIGIN}) set(CMAKE_BUILD_RPATH ${SHERPA_ONNX_RPATH_ORIGIN}) -if(BUILD_SHARED_LIBS AND MSVC) - set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) -endif() - if(NOT CMAKE_BUILD_TYPE) message(STATUS "No CMAKE_BUILD_TYPE given, default to Release") set(CMAKE_BUILD_TYPE Release) endif() -if(DEFINED ANDROID_ABI) +if(DEFINED ANDROID_ABI AND NOT SHERPA_ONNX_ENABLE_JNI) message(STATUS "Set SHERPA_ONNX_ENABLE_JNI to ON for Android") set(SHERPA_ONNX_ENABLE_JNI ON CACHE BOOL "" FORCE) endif() @@ -61,6 +57,10 @@ if(SHERPA_ONNX_ENABLE_JNI AND NOT BUILD_SHARED_LIBS) set(BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE) endif() +if(BUILD_SHARED_LIBS AND MSVC) + set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) +endif() + message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") message(STATUS "CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}") message(STATUS "BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}") diff --git a/cmake/cmake_extension.py b/cmake/cmake_extension.py index 2c80fe7e..0396e966 100644 --- a/cmake/cmake_extension.py +++ b/cmake/cmake_extension.py @@ -41,7 +41,6 @@ try: # -linux_x86_64.whl self.root_is_pure = False - except ImportError: bdist_wheel = None @@ -78,7 +77,6 @@ class BuildExtension(build_ext): extra_cmake_args += " -DSHERPA_ONNX_ENABLE_CHECK=OFF " extra_cmake_args += " -DSHERPA_ONNX_ENABLE_PYTHON=ON " extra_cmake_args += " -DSHERPA_ONNX_ENABLE_PORTAUDIO=ON " - extra_cmake_args += " -DSHERPA_ONNX_ENABLE_C_API=OFF " extra_cmake_args += " -DSHERPA_ONNX_ENABLE_WEBSOCKET=ON " if "PYTHON_EXECUTABLE" not in cmake_args: diff --git a/csharp-api-examples/OfflineDecodeFiles.cs b/csharp-api-examples/OfflineDecodeFiles.cs deleted file mode 100644 index 177200a4..00000000 --- a/csharp-api-examples/OfflineDecodeFiles.cs +++ /dev/null @@ -1,255 +0,0 @@ -// See https://aka.ms/new-console-template for more information -// Copyright (c) 2023 by manyeyes -using SherpaOnnx; -/// Please refer to -/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html -/// to download pre-trained models. That is, you can find encoder-xxx.onnx -/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct -/// from there. - -/// download model eg: -/// (The directory where the application runs) -/// [/path/to]=System.AppDomain.CurrentDomain.BaseDirectory -/// cd /path/to -/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-en-2023-04-01 -/// git clone https://huggingface.co/csukuangfj/paraformer-onnxruntime-python-example -/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-nemo-ctc-en-citrinet-512 - -/// NuGet for sherpa-onnx -/// PM > Install-Package NAudio -version 2.1.0 -Project sherpa-onnx -/// PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx - -// transducer Usage: -/* - .\SherpaOnnx.Examples.exe ` - --tokens=./all_models/sherpa-onnx-conformer-en-2023-03-18/tokens.txt ` - --encoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/encoder-epoch-99-avg-1.onnx ` - --decoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/decoder-epoch-99-avg-1.onnx ` - --joiner=./all_models/sherpa-onnx-conformer-en-2023-03-18/joiner-epoch-99-avg-1.onnx ` - --num-threads=2 ` - --decoding-method=greedy_search ` - --debug=false ` - ./all_models/sherpa-onnx-conformer-en-2023-03-18/test_wavs/0.wav - */ - -// paraformer Usage: -/* - .\SherpaOnnx.Examples.exe ` - --tokens=./all_models/paraformer-onnxruntime-python-example/tokens.txt ` - --paraformer=./all_models/paraformer-onnxruntime-python-example/model.onnx ` - --num-threads=2 ` - --decoding-method=greedy_search ` - --debug=false ` - ./all_models/paraformer-onnxruntime-python-example/test_wavs/0.wav - */ - -// paraformer Usage: -/* - .\SherpaOnnx.Examples.exe ` - --tokens=./all_models/paraformer-onnxruntime-python-example/tokens.txt ` - --paraformer=./all_models/paraformer-onnxruntime-python-example/model.onnx ` - --num-threads=2 ` - --decoding-method=greedy_search ` - --debug=false ` - ./all_models/paraformer-onnxruntime-python-example/test_wavs/0.wav - */ - - -internal class OfflineDecodeFiles -{ - static void Main(string[] args) - { - string usage = @" ------------------------------ -transducer Usage: - --tokens=./all_models/sherpa-onnx-conformer-en-2023-03-18/tokens.txt ` - --encoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/encoder-epoch-99-avg-1.onnx ` - --decoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/decoder-epoch-99-avg-1.onnx ` - --joiner=./all_models/sherpa-onnx-conformer-en-2023-03-18/joiner-epoch-99-avg-1.onnx ` - --num-threads=2 ` - --decoding-method=greedy_search ` - --debug=false ` - ./all_models/sherpa-onnx-conformer-en-2023-03-18/test_wavs/0.wav - -paraformer Usage: - --tokens=./all_models/paraformer-onnxruntime-python-example/tokens.txt ` - --paraformer=./all_models/paraformer-onnxruntime-python-example/model.onnx ` - --num-threads=2 ` - --decoding-method=greedy_search ` - --debug=false ` - ./all_models/paraformer-onnxruntime-python-example/test_wavs/0.wav - -nemo Usage: - --tokens=./all_models/sherpa-onnx-nemo-ctc-en-citrinet-512/tokens.txt ` - --nemo_ctc=./all_models/sherpa-onnx-nemo-ctc-en-citrinet-512/model.onnx ` - --num-threads=2 ` - --decoding-method=greedy_search ` - --debug=false ` - ./all_models/sherpa-onnx-nemo-ctc-en-citrinet-512/test_wavs/0.wav ------------------------------ -"; - if (args.Length == 0) - { - System.Console.WriteLine("Please enter the correct parameters:"); - System.Console.WriteLine(usage); - System.Text.StringBuilder sb = new System.Text.StringBuilder(); - //args = Console.ReadLine().Split(" "); - while (true) - { - string input = Console.ReadLine(); - sb.AppendLine(input); - if (Console.ReadKey().Key == ConsoleKey.Enter) - break; - } - args = sb.ToString().Split("\r\n"); - } - Console.WriteLine("Started!\n"); - string? applicationBase = System.AppDomain.CurrentDomain.BaseDirectory; - List wavFiles = new List(); - Dictionary argsDict = GetDict(args, applicationBase, ref wavFiles); - string decoder = argsDict.ContainsKey("decoder") ? Path.Combine(applicationBase, argsDict["decoder"]) : ""; - string encoder = argsDict.ContainsKey("encoder") ? Path.Combine(applicationBase, argsDict["encoder"]) : ""; - string joiner = argsDict.ContainsKey("joiner") ? Path.Combine(applicationBase, argsDict["joiner"]) : ""; - string paraformer = argsDict.ContainsKey("paraformer") ? Path.Combine(applicationBase, argsDict["paraformer"]) : ""; - string nemo_ctc = argsDict.ContainsKey("nemo_ctc") ? Path.Combine(applicationBase, argsDict["nemo_ctc"]) : ""; - string tokens = argsDict.ContainsKey("tokens") ? Path.Combine(applicationBase, argsDict["tokens"]) : ""; - string num_threads = argsDict.ContainsKey("num_threads") ? argsDict["num_threads"] : ""; - string decoding_method = argsDict.ContainsKey("decoding_method") ? argsDict["decoding_method"] : ""; - string debug = argsDict.ContainsKey("debug") ? argsDict["debug"] : ""; - - OfflineTransducer offlineTransducer = new OfflineTransducer(); - offlineTransducer.EncoderFilename = encoder; - offlineTransducer.DecoderFilename = decoder; - offlineTransducer.JoinerFilename = joiner; - - OfflineParaformer offlineParaformer = new OfflineParaformer(); - offlineParaformer.Model = paraformer; - - OfflineNemoEncDecCtc offlineNemoEncDecCtc = new OfflineNemoEncDecCtc(); - offlineNemoEncDecCtc.Model = nemo_ctc; - - int numThreads = 0; - int.TryParse(num_threads, out numThreads); - bool isDebug = false; - bool.TryParse(debug, out isDebug); - - string decodingMethod = string.IsNullOrEmpty(decoding_method) ? "" : decoding_method; - - if ((string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner)) - && string.IsNullOrEmpty(paraformer) - && string.IsNullOrEmpty(nemo_ctc)) - { - Console.WriteLine("Please specify at least one model"); - Console.WriteLine(usage); - } - // batch decode - TimeSpan total_duration = TimeSpan.Zero; - TimeSpan start_time = TimeSpan.Zero; - TimeSpan end_time = TimeSpan.Zero; - List results = new List(); - if (!(string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner))) - { - OfflineRecognizer offlineRecognizer = new OfflineRecognizer( - offlineTransducer, - tokens, - num_threads: numThreads, - debug: isDebug, - decoding_method: decodingMethod); - List samplesList = new List(); - foreach (string wavFile in wavFiles) - { - TimeSpan duration = TimeSpan.Zero; - float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration); - samplesList.Add(samples); - total_duration += duration; - } - OfflineStream[] streams = offlineRecognizer.CreateOfflineStream(samplesList); - start_time = new TimeSpan(DateTime.Now.Ticks); - offlineRecognizer.DecodeMultipleOfflineStreams(streams); - results = offlineRecognizer.GetResults(streams); - end_time = new TimeSpan(DateTime.Now.Ticks); - } - else if (!string.IsNullOrEmpty(paraformer)) - { - OfflineRecognizer offlineRecognizer = new OfflineRecognizer( - offlineParaformer, - tokens, - num_threads: numThreads, - debug: isDebug, - decoding_method: decodingMethod); - List samplesList = new List(); - foreach (string wavFile in wavFiles) - { - TimeSpan duration = TimeSpan.Zero; - float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration); - samplesList.Add(samples); - total_duration += duration; - } - OfflineStream[] streams = offlineRecognizer.CreateOfflineStream(samplesList); - start_time = new TimeSpan(DateTime.Now.Ticks); - offlineRecognizer.DecodeMultipleOfflineStreams(streams); - results = offlineRecognizer.GetResults(streams); - end_time = new TimeSpan(DateTime.Now.Ticks); - } - else if (!string.IsNullOrEmpty(nemo_ctc)) - { - OfflineRecognizer offlineRecognizer = new OfflineRecognizer( - offlineNemoEncDecCtc, - tokens, - num_threads: numThreads, - debug: isDebug, - decoding_method: decodingMethod); - List samplesList = new List(); - foreach (string wavFile in wavFiles) - { - TimeSpan duration = TimeSpan.Zero; - float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration); - samplesList.Add(samples); - total_duration += duration; - } - OfflineStream[] streams = offlineRecognizer.CreateOfflineStream(samplesList); - start_time = new TimeSpan(DateTime.Now.Ticks); - offlineRecognizer.DecodeMultipleOfflineStreams(streams); - results = offlineRecognizer.GetResults(streams); - end_time = new TimeSpan(DateTime.Now.Ticks); - } - - foreach (var item in results.Zip(wavFiles)) - { - Console.WriteLine("wavFile:{0}", item.Second); - Console.WriteLine("text:{0}", item.First.text.ToLower()); - Console.WriteLine("text_len:{0}\n", item.First.text_len.ToString()); - } - - double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds; - double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds; - Console.WriteLine("num_threads:{0}", num_threads); - Console.WriteLine("decoding_method:{0}", decodingMethod); - Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString()); - Console.WriteLine("wave total_duration_milliseconds:{0}", total_duration.TotalMilliseconds.ToString()); - Console.WriteLine("Real time factor (RTF):{0}", rtf.ToString()); - - Console.WriteLine("End!"); - } - - static Dictionary GetDict(string[] args, string applicationBase, ref List wavFiles) - { - Dictionary argsDict = new Dictionary(); - foreach (string input in args) - { - string[] ss = input.Split("="); - if (ss.Length == 1) - { - if (!string.IsNullOrEmpty(ss[0])) - { - wavFiles.Add(Path.Combine(applicationBase, ss[0].Trim(new char[] { '-', '`', ' ' }))); - } - } - else - { - argsDict.Add(ss[0].Trim(new char[] { '-', '`', ' ' }).Replace("-", "_"), ss[1].Trim(new char[] { '-', '`', ' ' })); - } - } - return argsDict; - } -} \ No newline at end of file diff --git a/csharp-api-examples/OnlineDecodeFile.cs b/csharp-api-examples/OnlineDecodeFile.cs deleted file mode 100644 index 20a027e0..00000000 --- a/csharp-api-examples/OnlineDecodeFile.cs +++ /dev/null @@ -1,171 +0,0 @@ -// See https://aka.ms/new-console-template for more information -// Copyright (c) 2023 by manyeyes -using SherpaOnnx; -/// Please refer to -/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html -/// to download pre-trained models. That is, you can find encoder-xxx.onnx -/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct -/// from there. - -/// download model eg: -/// (The directory where the application runs) -/// [/path/to]=System.AppDomain.CurrentDomain.BaseDirectory -/// cd /path/to -/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 - -/// NuGet for sherpa-onnx -/// PM > Install-Package NAudio -version 2.1.0 -Project sherpa-onnx -/// PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx - -// transducer Usage: -/* - .\SherpaOnnx.Examples.exe ` - --tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ` - --encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx ` - --decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ` - --joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx ` - --num-threads=2 ` - --decoding-method=modified_beam_search ` - --debug=false ` - ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav - */ - -internal class OnlineDecodeFile -{ - static void Main(string[] args) - { - string usage = @" ------------------------------ -transducer Usage: - --tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ` - --encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx ` - --decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ` - --joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx ` - --num-threads=2 ` - --decoding-method=modified_beam_search ` - --debug=false ` - ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav ------------------------------ -"; - if (args.Length == 0) - { - System.Console.WriteLine("Please enter the correct parameters:"); - System.Console.WriteLine(usage); - System.Text.StringBuilder sb = new System.Text.StringBuilder(); - //args = Console.ReadLine().Split(" "); - while (true) - { - string input = Console.ReadLine(); - sb.AppendLine(input); - if (Console.ReadKey().Key == ConsoleKey.Enter) - break; - } - args = sb.ToString().Split("\r\n"); - } - Console.WriteLine("Started!\n"); - string? applicationBase = System.AppDomain.CurrentDomain.BaseDirectory; - List wavFiles = new List(); - Dictionary argsDict = GetDict(args, applicationBase, ref wavFiles); - string decoder = argsDict.ContainsKey("decoder") ? Path.Combine(applicationBase, argsDict["decoder"]) : ""; - string encoder = argsDict.ContainsKey("encoder") ? Path.Combine(applicationBase, argsDict["encoder"]) : ""; - string joiner = argsDict.ContainsKey("joiner") ? Path.Combine(applicationBase, argsDict["joiner"]) : ""; - string paraformer = argsDict.ContainsKey("paraformer") ? Path.Combine(applicationBase, argsDict["paraformer"]) : ""; - string nemo_ctc = argsDict.ContainsKey("nemo_ctc") ? Path.Combine(applicationBase, argsDict["nemo_ctc"]) : ""; - string tokens = argsDict.ContainsKey("tokens") ? Path.Combine(applicationBase, argsDict["tokens"]) : ""; - string num_threads = argsDict.ContainsKey("num_threads") ? argsDict["num_threads"] : ""; - string decoding_method = argsDict.ContainsKey("decoding_method") ? argsDict["decoding_method"] : ""; - string debug = argsDict.ContainsKey("debug") ? argsDict["debug"] : ""; - - OfflineTransducer offlineTransducer = new OfflineTransducer(); - offlineTransducer.EncoderFilename = encoder; - offlineTransducer.DecoderFilename = decoder; - offlineTransducer.JoinerFilename = joiner; - - OfflineParaformer offlineParaformer = new OfflineParaformer(); - offlineParaformer.Model = paraformer; - - OfflineNemoEncDecCtc offlineNemoEncDecCtc = new OfflineNemoEncDecCtc(); - offlineNemoEncDecCtc.Model = nemo_ctc; - - int numThreads = 0; - int.TryParse(num_threads, out numThreads); - bool isDebug = false; - bool.TryParse(debug, out isDebug); - - string decodingMethod = string.IsNullOrEmpty(decoding_method) ? "" : decoding_method; - - if ((string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner)) - && string.IsNullOrEmpty(paraformer) - && string.IsNullOrEmpty(nemo_ctc)) - { - Console.WriteLine("Please specify at least one model"); - Console.WriteLine(usage); - } - // batch decode - TimeSpan total_duration = TimeSpan.Zero; - TimeSpan start_time = TimeSpan.Zero; - TimeSpan end_time = TimeSpan.Zero; - List results = new List(); - if (!(string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner))) - { - OnlineTransducer onlineTransducer = new OnlineTransducer(); - onlineTransducer.EncoderFilename = encoder; - onlineTransducer.DecoderFilename = decoder; - onlineTransducer.JoinerFilename = joiner; - //test online - OnlineRecognizer onlineRecognizer = new OnlineRecognizer( - onlineTransducer, - tokens, - num_threads: numThreads, - debug: isDebug, - decoding_method: decodingMethod); - foreach (string wavFile in wavFiles) - { - TimeSpan duration = TimeSpan.Zero; - List samplesList = AudioHelper.GetChunkSamplesList(wavFile, ref duration); - OnlineStream stream = onlineRecognizer.CreateStream(); - start_time = new TimeSpan(DateTime.Now.Ticks); - for (int i = 0; i < samplesList.Count; i++) - { - onlineRecognizer.AcceptWaveForm(stream, 16000, samplesList[i]); - onlineRecognizer.DecodeStream(stream); - OnlineRecognizerResultEntity result_on = onlineRecognizer.GetResult(stream); - Console.WriteLine(result_on.text); - } - total_duration += duration; - } - end_time = new TimeSpan(DateTime.Now.Ticks); - } - double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds; - double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds; - Console.WriteLine("num_threads:{0}", num_threads); - Console.WriteLine("decoding_method:{0}", decodingMethod); - Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString()); - Console.WriteLine("wave total_duration_milliseconds:{0}", total_duration.TotalMilliseconds.ToString()); - Console.WriteLine("Real time factor (RTF):{0}", rtf.ToString()); - - Console.WriteLine("End!"); - } - - static Dictionary GetDict(string[] args, string applicationBase, ref List wavFiles) - { - Dictionary argsDict = new Dictionary(); - foreach (string input in args) - { - string[] ss = input.Split("="); - if (ss.Length == 1) - { - if (!string.IsNullOrEmpty(ss[0])) - { - wavFiles.Add(Path.Combine(applicationBase, ss[0].Trim(new char[] { '-', '`', ' ' }))); - } - } - else - { - argsDict.Add(ss[0].Trim(new char[] { '-', '`', ' ' }).Replace("-", "_"), ss[1].Trim(new char[] { '-', '`', ' ' })); - } - } - return argsDict; - } - -} \ No newline at end of file diff --git a/csharp-api-examples/OnlineDecodeFiles.cs b/csharp-api-examples/OnlineDecodeFiles.cs deleted file mode 100644 index 66f5492f..00000000 --- a/csharp-api-examples/OnlineDecodeFiles.cs +++ /dev/null @@ -1,221 +0,0 @@ -// See https://aka.ms/new-console-template for more information -// Copyright (c) 2023 by manyeyes -using SherpaOnnx; -/// Please refer to -/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html -/// to download pre-trained models. That is, you can find encoder-xxx.onnx -/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct -/// from there. - -/// download model eg: -/// (The directory where the application runs) -/// [/path/to]=System.AppDomain.CurrentDomain.BaseDirectory -/// cd /path/to -/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 - -/// NuGet for sherpa-onnx -/// PM > Install-Package NAudio -version 2.1.0 -Project sherpa-onnx -/// PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx - -// transducer Usage: -/* - .\SherpaOnnx.Examples.exe ` - --tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ` - --encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx ` - --decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ` - --joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx ` - --num-threads=2 ` - --decoding-method=modified_beam_search ` - --debug=false ` - ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav ` - ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav - */ - -internal class OnlineDecodeFiles -{ - static void Main(string[] args) - { - string usage = @" ------------------------------ -transducer Usage: - --tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ` - --encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx ` - --decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ` - --joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx ` - --num-threads=2 ` - --decoding-method=modified_beam_search ` - --debug=false ` - ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav ` - ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav ------------------------------ -"; - if (args.Length == 0) - { - System.Console.WriteLine("Please enter the correct parameters:"); - System.Console.WriteLine(usage); - System.Text.StringBuilder sb = new System.Text.StringBuilder(); - //args = Console.ReadLine().Split(" "); - while (true) - { - string input = Console.ReadLine(); - sb.AppendLine(input); - if (Console.ReadKey().Key == ConsoleKey.Enter) - break; - } - args = sb.ToString().Split("\r\n"); - } - Console.WriteLine("Started!\n"); - string? applicationBase = System.AppDomain.CurrentDomain.BaseDirectory; - List wavFiles = new List(); - Dictionary argsDict = GetDict(args, applicationBase, ref wavFiles); - string decoder = argsDict.ContainsKey("decoder") ? Path.Combine(applicationBase, argsDict["decoder"]) : ""; - string encoder = argsDict.ContainsKey("encoder") ? Path.Combine(applicationBase, argsDict["encoder"]) : ""; - string joiner = argsDict.ContainsKey("joiner") ? Path.Combine(applicationBase, argsDict["joiner"]) : ""; - string paraformer = argsDict.ContainsKey("paraformer") ? Path.Combine(applicationBase, argsDict["paraformer"]) : ""; - string nemo_ctc = argsDict.ContainsKey("nemo_ctc") ? Path.Combine(applicationBase, argsDict["nemo_ctc"]) : ""; - string tokens = argsDict.ContainsKey("tokens") ? Path.Combine(applicationBase, argsDict["tokens"]) : ""; - string num_threads = argsDict.ContainsKey("num_threads") ? argsDict["num_threads"] : ""; - string decoding_method = argsDict.ContainsKey("decoding_method") ? argsDict["decoding_method"] : ""; - string debug = argsDict.ContainsKey("debug") ? argsDict["debug"] : ""; - - OfflineTransducer offlineTransducer = new OfflineTransducer(); - offlineTransducer.EncoderFilename = encoder; - offlineTransducer.DecoderFilename = decoder; - offlineTransducer.JoinerFilename = joiner; - - OfflineParaformer offlineParaformer = new OfflineParaformer(); - offlineParaformer.Model = paraformer; - - OfflineNemoEncDecCtc offlineNemoEncDecCtc = new OfflineNemoEncDecCtc(); - offlineNemoEncDecCtc.Model = nemo_ctc; - - int numThreads = 0; - int.TryParse(num_threads, out numThreads); - bool isDebug = false; - bool.TryParse(debug, out isDebug); - - string decodingMethod = string.IsNullOrEmpty(decoding_method) ? "" : decoding_method; - - if ((string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner)) - && string.IsNullOrEmpty(paraformer) - && string.IsNullOrEmpty(nemo_ctc)) - { - Console.WriteLine("Please specify at least one model"); - Console.WriteLine(usage); - } - // batch decode - TimeSpan total_duration = TimeSpan.Zero; - TimeSpan start_time = TimeSpan.Zero; - TimeSpan end_time = TimeSpan.Zero; - List results = new List(); - if (!(string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner))) - { - OnlineTransducer onlineTransducer = new OnlineTransducer(); - onlineTransducer.EncoderFilename = encoder; - onlineTransducer.DecoderFilename = decoder; - onlineTransducer.JoinerFilename = joiner; - //test online - OnlineRecognizer onlineRecognizer = new OnlineRecognizer( - onlineTransducer, - tokens, - num_threads: numThreads, - debug: isDebug, - decoding_method: decodingMethod); - List samplesList = new List(); - foreach (string wavFile in wavFiles) - { - TimeSpan duration = TimeSpan.Zero; - float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration); - samplesList.Add(samples); - total_duration += duration; - } - start_time = new TimeSpan(DateTime.Now.Ticks); - List streams = new List(); - foreach (float[] samples in samplesList) - { - OnlineStream stream = onlineRecognizer.CreateStream(); - onlineRecognizer.AcceptWaveForm(stream, 16000, samples); - streams.Add(stream); - onlineRecognizer.InputFinished(stream); - } - onlineRecognizer.DecodeMultipleStreams(streams); - results = onlineRecognizer.GetResults(streams); - foreach (OnlineRecognizerResultEntity result in results) - { - Console.WriteLine(result.text); - } - end_time = new TimeSpan(DateTime.Now.Ticks); - } - - - foreach (var item in results.Zip(wavFiles)) - { - Console.WriteLine("wavFile:{0}", item.Second); - Console.WriteLine("text:{0}", item.First.text.ToLower()); - Console.WriteLine("text_len:{0}\n", item.First.text_len.ToString()); - } - - double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds; - double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds; - Console.WriteLine("num_threads:{0}", num_threads); - Console.WriteLine("decoding_method:{0}", decodingMethod); - Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString()); - Console.WriteLine("wave total_duration_milliseconds:{0}", total_duration.TotalMilliseconds.ToString()); - Console.WriteLine("Real time factor (RTF):{0}", rtf.ToString()); - - Console.WriteLine("End!"); - } - - public void AnotherWayOfDecodeFiles(string encoder, string decoder, string joiner, string tokens, int numThreads, bool isDebug, string decodingMethod, List wavFiles, ref TimeSpan total_duration) - { - OnlineTransducer onlineTransducer = new OnlineTransducer(); - onlineTransducer.EncoderFilename = encoder; - onlineTransducer.DecoderFilename = decoder; - onlineTransducer.JoinerFilename = joiner; - //test online - OnlineRecognizer onlineRecognizer = new OnlineRecognizer( - onlineTransducer, - tokens, - num_threads: numThreads, - debug: isDebug, - decoding_method: decodingMethod); - List samplesList = new List(); - foreach (string wavFile in wavFiles) - { - TimeSpan duration = TimeSpan.Zero; - float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration); - samplesList.Add(samples); - total_duration += duration; - } - TimeSpan start_time = new TimeSpan(DateTime.Now.Ticks); - List streams = onlineRecognizer.CreateStreams(samplesList); - onlineRecognizer.DecodeMultipleStreams(streams); - List results = onlineRecognizer.GetResults(streams); - foreach (OnlineRecognizerResultEntity result in results) - { - Console.WriteLine(result.text); - } - TimeSpan end_time = new TimeSpan(DateTime.Now.Ticks); - } - - static Dictionary GetDict(string[] args, string applicationBase, ref List wavFiles) - { - Dictionary argsDict = new Dictionary(); - foreach (string input in args) - { - string[] ss = input.Split("="); - if (ss.Length == 1) - { - if (!string.IsNullOrEmpty(ss[0])) - { - wavFiles.Add(Path.Combine(applicationBase, ss[0].Trim(new char[] { '-', '`', ' ' }))); - } - } - else - { - argsDict.Add(ss[0].Trim(new char[] { '-', '`', ' ' }).Replace("-", "_"), ss[1].Trim(new char[] { '-', '`', ' ' })); - } - } - return argsDict; - } -} \ No newline at end of file diff --git a/csharp-api-examples/README.md b/csharp-api-examples/README.md deleted file mode 100644 index 2d2eded7..00000000 --- a/csharp-api-examples/README.md +++ /dev/null @@ -1,9 +0,0 @@ -#ProjectReference csharp-api -`` -The location of the 'SherpaOnnx' file is ../sherpa-onnx/csharp-api. -This C # API is cross platform and you can compile it yourself in Windows, Mac OS, and Linux environments. - ------------- -Alternatively, install sherpaonnx through nuget. -#NuGet for sherpa-onnx -PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx \ No newline at end of file diff --git a/csharp-api-examples/Utils/AudioHelper.cs b/csharp-api-examples/Utils/AudioHelper.cs deleted file mode 100644 index c70065c8..00000000 --- a/csharp-api-examples/Utils/AudioHelper.cs +++ /dev/null @@ -1,67 +0,0 @@ -using NAudio.Wave; -using System; -using System.Collections.Generic; -using System.Diagnostics; -using System.Linq; -using System.Text; -using System.Threading.Tasks; - -/// -/// audio processing -/// Copyright (c) 2023 by manyeyes -/// -public class AudioHelper -{ - public static float[] GetFileSamples(string wavFilePath, ref TimeSpan duration) - { - if (!File.Exists(wavFilePath)) - { - Trace.Assert(File.Exists(wavFilePath), "file does not exist:" + wavFilePath); - return new float[1]; - } - AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath); - byte[] datas = new byte[_audioFileReader.Length]; - _audioFileReader.Read(datas, 0, datas.Length); - duration = _audioFileReader.TotalTime; - float[] wavdata = new float[datas.Length / sizeof(float)]; - Buffer.BlockCopy(datas, 0, wavdata, 0, datas.Length); - return wavdata; - } - - public static List GetChunkSamplesList(string wavFilePath, ref TimeSpan duration) - { - List wavdatas = new List(); - if (!File.Exists(wavFilePath)) - { - Trace.Assert(File.Exists(wavFilePath), "file does not exist:" + wavFilePath); - wavdatas.Add(new float[1]); - return wavdatas; - } - AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath); - byte[] datas = new byte[_audioFileReader.Length]; - int chunkSize = 16000;// datas.Length / sizeof(float); - int chunkNum = (int)Math.Ceiling((double)datas.Length / chunkSize); - for (int i = 0; i < chunkNum; i++) - { - int offset = 0; - int dataCount = 0; - if (Math.Abs(datas.Length - i * chunkSize) > chunkSize) - { - offset = i * chunkSize; - dataCount = chunkSize; - } - else - { - offset = i * chunkSize; - dataCount = datas.Length - i * chunkSize; - } - _audioFileReader.Read(datas, offset, dataCount); - duration += _audioFileReader.TotalTime; - float[] wavdata = new float[chunkSize / sizeof(float)]; - Buffer.BlockCopy(datas, offset, wavdata, 0, dataCount); - wavdatas.Add(wavdata); - - } - return wavdatas; - } -} diff --git a/dotnet-examples/.editorconfig b/dotnet-examples/.editorconfig new file mode 100644 index 00000000..bdcd2a8b --- /dev/null +++ b/dotnet-examples/.editorconfig @@ -0,0 +1,13 @@ +# top-most EditorConfig file +root = true + +# Don't use tabs for indentation. +[*] +indent_style = space + +# Code files +[*.{cs,csx,vb,vbx}] +indent_size = 2 +insert_final_newline = true +charset = utf-8-bom +end_of_line = crlf diff --git a/dotnet-examples/.gitignore b/dotnet-examples/.gitignore new file mode 100644 index 00000000..1746e326 --- /dev/null +++ b/dotnet-examples/.gitignore @@ -0,0 +1,2 @@ +bin +obj diff --git a/dotnet-examples/offline-decode-files/Program.cs b/dotnet-examples/offline-decode-files/Program.cs new file mode 100644 index 00000000..7b729252 --- /dev/null +++ b/dotnet-examples/offline-decode-files/Program.cs @@ -0,0 +1,179 @@ +// Copyright (c) 2023 Xiaomi Corporation +// Copyright (c) 2023 by manyeyes +// +// This file shows how to use a non-streaming model to decode files +// Please refer to +// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html +// to download non-streaming models +using CommandLine.Text; +using CommandLine; +using SherpaOnnx; +using System.Collections.Generic; +using System; + +class OfflineDecodeFiles +{ + class Options + { + [Option(Required = false, HelpText = "Path to tokens.txt")] + public string Tokens { get; set; } + + [Option(Required = false, HelpText = "Path to encoder.onnx. Used only for transducer models")] + public string Encoder { get; set; } + + [Option(Required = false, HelpText = "Path to decoder.onnx. Used only for transducer models")] + public string Decoder { get; set; } + + [Option(Required = false, HelpText = "Path to joiner.onnx. Used only for transducer models")] + public string Joiner { get; set; } + + [Option(Required = false, HelpText = "Path to model.onnx. Used only for paraformer models")] + public string Paraformer { get; set; } + + [Option("nemo-ctc", Required = false, HelpText = "Path to model.onnx. Used only for NeMo CTC models")] + public string NeMoCtc { get; set; } + + [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")] + public int NumThreads { get; set; } + + [Option("decoding-method", Required = false, Default = "greedy_search", + HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")] + public string DecodingMethod { get; set; } + + [Option("max-active-paths", Required = false, Default = 4, + HelpText = @"Used only when --decoding--method is modified_beam_search. +It specifies number of active paths to keep during the search")] + public int MaxActivePaths { get; set; } + + [Option("files", Required = true, HelpText = "Audio files for decoding")] + public IEnumerable Files { get; set; } + } + + static void Main(string[] args) + { + var parser = new CommandLine.Parser(with => with.HelpWriter = null); + var parserResult = parser.ParseArguments(args); + + parserResult + .WithParsed(options => Run(options)) + .WithNotParsed(errs => DisplayHelp(parserResult, errs)); + } + + private static void DisplayHelp(ParserResult result, IEnumerable errs) + { + string usage = @" +# Zipformer + +dotnet run \ + --tokens=./sherpa-onnx-zipformer-en-2023-04-01/tokens.txt \ + --encoder=./sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.onnx \ + --decoder=./sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.onnx \ + --joiner=./sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.onnx \ + --files ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/0.wav \ + ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/1.wav \ + ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/8k.wav + +Please refer to +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/index.html +to download pre-trained non-streaming zipformer models. + +# Paraformer + +dotnet run \ + --tokens=./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \ + --paraformer=./sherpa-onnx-paraformer-zh-2023-03-28/model.onnx \ + --files ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/0.wav \ + ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav \ + ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/1.wav \ + ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/2.wav \ + ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/8k.wav + +Please refer to +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html +to download pre-trained paraformer models + +# NeMo CTC + +dotnet run \ + --tokens=./sherpa-onnx-nemo-ctc-en-conformer-medium/tokens.txt \ + --nemo-ctc=./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \ + --num-threads=1 \ + --files ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/0.wav \ + ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/1.wav \ + ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/8k.wav + +Please refer to +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/index.html +to download pre-trained paraformer models +"; + + var helpText = HelpText.AutoBuild(result, h => + { + h.AdditionalNewLineAfterOption = false; + h.Heading = usage; + h.Copyright = "Copyright (c) 2023 Xiaomi Corporation"; + return HelpText.DefaultParsingErrorsHandler(result, h); + }, e => e); + Console.WriteLine(helpText); + } + + private static void Run(Options options) + { + OfflineRecognizerConfig config = new OfflineRecognizerConfig(); + config.ModelConfig.Tokens = options.Tokens; + + if (!String.IsNullOrEmpty(options.Encoder)) + { + // this is a transducer model + config.ModelConfig.Transducer.Encoder = options.Encoder; + config.ModelConfig.Transducer.Decoder = options.Decoder; + config.ModelConfig.Transducer.Joiner = options.Joiner; + } + else if (!String.IsNullOrEmpty(options.Paraformer)) + { + config.ModelConfig.Paraformer.Model = options.Paraformer; + } + else if (!String.IsNullOrEmpty(options.NeMoCtc)) + { + config.ModelConfig.NeMoCtc.Model = options.NeMoCtc; + } + else + { + Console.WriteLine("Please provide a model"); + return; + } + + config.DecodingMethod = options.DecodingMethod; + config.MaxActivePaths = options.MaxActivePaths; + config.ModelConfig.Debug = 0; + + OfflineRecognizer recognizer = new OfflineRecognizer(config); + + string[] files = options.Files.ToArray(); + + // We create a separate stream for each file + List streams = new List(); + streams.EnsureCapacity(files.Length); + + for (int i = 0; i != files.Length; ++i) + { + OfflineStream s = recognizer.CreateStream(); + + WaveReader waveReader = new WaveReader(files[i]); + s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); + streams.Add(s); + } + + recognizer.Decode(streams); + + // display results + for (int i = 0; i != files.Length; ++i) + { + var text = streams[i].Result.Text; + Console.WriteLine("--------------------"); + Console.WriteLine(files[i]); + Console.WriteLine(text); + } + Console.WriteLine("--------------------"); + } +} diff --git a/dotnet-examples/offline-decode-files/WaveReader.cs b/dotnet-examples/offline-decode-files/WaveReader.cs new file mode 120000 index 00000000..bedfc634 --- /dev/null +++ b/dotnet-examples/offline-decode-files/WaveReader.cs @@ -0,0 +1 @@ +../online-decode-files/WaveReader.cs \ No newline at end of file diff --git a/csharp-api-examples/sherpa-onnx.csproj b/dotnet-examples/offline-decode-files/offline-decode-files.csproj similarity index 51% rename from csharp-api-examples/sherpa-onnx.csproj rename to dotnet-examples/offline-decode-files/offline-decode-files.csproj index c00f0948..336e0349 100644 --- a/csharp-api-examples/sherpa-onnx.csproj +++ b/dotnet-examples/offline-decode-files/offline-decode-files.csproj @@ -1,20 +1,16 @@ - - - - Exe - net6.0 - sherpa_onnx - enable - enable - OnlineDecodeFiles - - - - - - - - - - - + + + + Exe + net6.0 + offline_decode_files + enable + enable + + + + + + + + diff --git a/dotnet-examples/offline-decode-files/run-nemo-ctc.sh b/dotnet-examples/offline-decode-files/run-nemo-ctc.sh new file mode 100755 index 00000000..dcfcf4f4 --- /dev/null +++ b/dotnet-examples/offline-decode-files/run-nemo-ctc.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +if [ ! -d ./sherpa-onnx-nemo-ctc-en-conformer-medium ]; then + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-medium + cd sherpa-onnx-nemo-ctc-en-conformer-medium + git lfs pull --include "*.onnx" + cd .. +fi + +dotnet run \ + --tokens=./sherpa-onnx-nemo-ctc-en-conformer-medium/tokens.txt \ + --nemo-ctc=./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \ + --num-threads=1 \ + --files ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/0.wav \ + ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/1.wav \ + ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/8k.wav diff --git a/dotnet-examples/offline-decode-files/run-paraformer.sh b/dotnet-examples/offline-decode-files/run-paraformer.sh new file mode 100755 index 00000000..9dff4574 --- /dev/null +++ b/dotnet-examples/offline-decode-files/run-paraformer.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +if [ ! -d ./sherpa-onnx-paraformer-zh-2023-03-28 ]; then + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28 + cd sherpa-onnx-paraformer-zh-2023-03-28 + git lfs pull --include "*.onnx" + cd .. +fi + +dotnet run \ + --tokens=./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \ + --paraformer=./sherpa-onnx-paraformer-zh-2023-03-28/model.onnx \ + --num-threads=2 \ + --files ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav \ + ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/1.wav \ + ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/2.wav \ + ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/8k.wav diff --git a/dotnet-examples/offline-decode-files/run-zipformer.sh b/dotnet-examples/offline-decode-files/run-zipformer.sh new file mode 100755 index 00000000..0bfb3641 --- /dev/null +++ b/dotnet-examples/offline-decode-files/run-zipformer.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +# +if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-en-2023-04-01 + cd sherpa-onnx-zipformer-en-2023-04-01 + git lfs pull --include "*.onnx" + cd .. +fi + +dotnet run \ + --tokens=./sherpa-onnx-zipformer-en-2023-04-01/tokens.txt \ + --encoder=./sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.onnx \ + --decoder=./sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.onnx \ + --joiner=./sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.onnx \ + --num-threads=2 \ + --decoding-method=modified_beam_search \ + --files ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/0.wav \ + ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/1.wav \ + ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/8k.wav diff --git a/dotnet-examples/online-decode-files/Program.cs b/dotnet-examples/online-decode-files/Program.cs new file mode 100644 index 00000000..f37ad2bc --- /dev/null +++ b/dotnet-examples/online-decode-files/Program.cs @@ -0,0 +1,181 @@ +// Copyright (c) 2023 Xiaomi Corporation +// Copyright (c) 2023 by manyeyes +// +// This file shows how to use a streaming model to decode files +// Please refer to +// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html +// to download streaming models + +using CommandLine.Text; +using CommandLine; +using SherpaOnnx; +using System.Collections.Generic; +using System.Linq; +using System; + +class OnlineDecodeFiles +{ + class Options + { + [Option(Required = true, HelpText = "Path to tokens.txt")] + public string Tokens { get; set; } + + [Option(Required = true, HelpText = "Path to encoder.onnx")] + public string Encoder { get; set; } + + [Option(Required = true, HelpText = "Path to decoder.onnx")] + public string Decoder { get; set; } + + [Option(Required = true, HelpText = "Path to joiner.onnx")] + public string Joiner { get; set; } + + [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")] + public int NumThreads { get; set; } + + [Option("decoding-method", Required = false, Default = "greedy_search", + HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")] + public string DecodingMethod { get; set; } + + [Option(Required = false, Default = false, HelpText = "True to show model info during loading")] + public bool Debug { get; set; } + + [Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")] + public int SampleRate { get; set; } + + [Option("max-active-paths", Required = false, Default = 4, + HelpText = @"Used only when --decoding--method is modified_beam_search. +It specifies number of active paths to keep during the search")] + public int MaxActivePaths { get; set; } + + [Option("enable-endpoint", Required = false, Default = false, + HelpText = "True to enable endpoint detection.")] + public bool EnableEndpoint { get; set; } + + [Option("rule1-min-trailing-silence", Required = false, Default = 2.4F, + HelpText = @"An endpoint is detected if trailing silence in seconds is +larger than this value even if nothing has been decoded. Used only when --enable-endpoint is true.")] + public float Rule1MinTrailingSilence { get; set; } + + [Option("rule2-min-trailing-silence", Required = false, Default = 1.2F, + HelpText = @"An endpoint is detected if trailing silence in seconds is +larger than this value after something that is not blank has been decoded. Used +only when --enable-endpoint is true.")] + public float Rule2MinTrailingSilence { get; set; } + + [Option("rule3-min-utterance-length", Required = false, Default = 20.0F, + HelpText = @"An endpoint is detected if the utterance in seconds is +larger than this value. Used only when --enable-endpoint is true.")] + public float Rule3MinUtteranceLength { get; set; } + + [Option("files", Required = true, HelpText = "Audio files for decoding")] + public IEnumerable Files { get; set; } + + } + + static void Main(string[] args) + { + var parser = new CommandLine.Parser(with => with.HelpWriter = null); + var parserResult = parser.ParseArguments(args); + + parserResult + .WithParsed(options => Run(options)) + .WithNotParsed(errs => DisplayHelp(parserResult, errs)); + } + + private static void DisplayHelp(ParserResult result, IEnumerable errs) + { + string usage = @" +dotnet run \ + --tokens=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \ + --encoder=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx \ + --decoder=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \ + --joiner=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx \ + --num-threads=2 \ + --decoding-method=modified_beam_search \ + --debug=false \ + ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav \ + ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav + +Please refer to +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html +to download pre-trained streaming models. +"; + + var helpText = HelpText.AutoBuild(result, h => + { + h.AdditionalNewLineAfterOption = false; + h.Heading = usage; + h.Copyright = "Copyright (c) 2023 Xiaomi Corporation"; + return HelpText.DefaultParsingErrorsHandler(result, h); + }, e => e); + Console.WriteLine(helpText); + } + + private static void Run(Options options) + { + OnlineRecognizerConfig config = new OnlineRecognizerConfig(); + config.FeatConfig.SampleRate = options.SampleRate; + + // All models from icefall using feature dim 80. + // You can change it if your model has a different feature dim. + config.FeatConfig.FeatureDim = 80; + + config.TransducerModelConfig.Encoder = options.Encoder; + config.TransducerModelConfig.Decoder = options.Decoder; + config.TransducerModelConfig.Joiner = options.Joiner; + config.TransducerModelConfig.Tokens = options.Tokens; + config.TransducerModelConfig.NumThreads = options.NumThreads; + config.TransducerModelConfig.Debug = options.Debug ? 1 : 0; + + config.DecodingMethod = options.DecodingMethod; + config.MaxActivePaths = options.MaxActivePaths; + config.EnableEndpoint = options.EnableEndpoint ? 1 : 0; + + config.Rule1MinTrailingSilence = options.Rule1MinTrailingSilence; + config.Rule2MinTrailingSilence = options.Rule2MinTrailingSilence; + config.Rule3MinUtteranceLength = options.Rule3MinUtteranceLength; + + OnlineRecognizer recognizer = new OnlineRecognizer(config); + + string[] files = options.Files.ToArray(); + + // We create a separate stream for each file + List streams = new List(); + streams.EnsureCapacity(files.Length); + + for (int i = 0; i != files.Length; ++i) + { + OnlineStream s = recognizer.CreateStream(); + + WaveReader waveReader = new WaveReader(files[i]); + s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); + + float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)]; + s.AcceptWaveform(waveReader.SampleRate, tailPadding); + s.InputFinished(); + + streams.Add(s); + } + + while (true) + { + var readyStreams = streams.Where(s => recognizer.IsReady(s)); + if (!readyStreams.Any()) + { + break; + } + + recognizer.Decode(readyStreams); + } + + // display results + for (int i = 0; i != files.Length; ++i) + { + var text = recognizer.GetResult(streams[i]).Text; + Console.WriteLine("--------------------"); + Console.WriteLine(files[i]); + Console.WriteLine(text); + } + Console.WriteLine("--------------------"); + } +} diff --git a/dotnet-examples/online-decode-files/WaveReader.cs b/dotnet-examples/online-decode-files/WaveReader.cs new file mode 100644 index 00000000..1937b179 --- /dev/null +++ b/dotnet-examples/online-decode-files/WaveReader.cs @@ -0,0 +1,174 @@ +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) +using System; +using System.IO; + +using System.Runtime.InteropServices; + +namespace SherpaOnnx +{ + + [StructLayout(LayoutKind.Sequential)] + public struct WaveHeader + { + public Int32 ChunkID; + public Int32 ChunkSize; + public Int32 Format; + public Int32 SubChunk1ID; + public Int32 SubChunk1Size; + public Int16 AudioFormat; + public Int16 NumChannels; + public Int32 SampleRate; + public Int32 ByteRate; + public Int16 BlockAlign; + public Int16 BitsPerSample; + public Int32 SubChunk2ID; + public Int32 SubChunk2Size; + + public bool Validate() + { + if (ChunkID != 0x46464952) + { + Console.WriteLine($"Invalid chunk ID: 0x{ChunkID:X}. Expect 0x46464952"); + return false; + } + + // E V A W + if (Format != 0x45564157) + { + Console.WriteLine($"Invalid format: 0x{Format:X}. Expect 0x45564157"); + return false; + } + + // t m f + if (SubChunk1ID != 0x20746d66) + { + Console.WriteLine($"Invalid SubChunk1ID: 0x{SubChunk1ID:X}. Expect 0x20746d66"); + return false; + } + + if (SubChunk1Size != 16) + { + Console.WriteLine($"Invalid SubChunk1Size: {SubChunk1Size}. Expect 16"); + return false; + } + + if (AudioFormat != 1) + { + Console.WriteLine($"Invalid AudioFormat: {AudioFormat}. Expect 1"); + return false; + } + + if (NumChannels != 1) + { + Console.WriteLine($"Invalid NumChannels: {NumChannels}. Expect 1"); + return false; + } + + if (ByteRate != (SampleRate * NumChannels * BitsPerSample / 8)) + { + Console.WriteLine($"Invalid byte rate: {ByteRate}."); + return false; + } + + if (BlockAlign != (NumChannels * BitsPerSample / 8)) + { + Console.WriteLine($"Invalid block align: {ByteRate}."); + return false; + } + + if (BitsPerSample != 16) + { // we support only 16 bits per sample + Console.WriteLine($"Invalid bits per sample: {BitsPerSample}. Expect 16"); + return false; + } + + return true; + } + } + + // It supports only 16-bit, single channel WAVE format. + // The sample rate can be any value. + public class WaveReader + { + public WaveReader(String fileName) + { + if (!File.Exists(fileName)) + { + throw new ApplicationException($"{fileName} does not exist!"); + } + + using (var stream = File.Open(fileName, FileMode.Open)) + { + using (var reader = new BinaryReader(stream)) + { + _header = ReadHeader(reader); + + if (!_header.Validate()) + { + throw new ApplicationException($"Invalid wave file ${fileName}"); + } + + SkipMetaData(reader); + + // now read samples + // _header.SubChunk2Size contains number of bytes in total. + // we assume each sample is of type int16 + byte[] buffer = reader.ReadBytes(_header.SubChunk2Size); + short[] samples_int16 = new short[_header.SubChunk2Size / 2]; + Buffer.BlockCopy(buffer, 0, samples_int16, 0, buffer.Length); + + _samples = new float[samples_int16.Length]; + + for (var i = 0; i < samples_int16.Length; ++i) + { + _samples[i] = samples_int16[i] / 32768.0F; + } + } + } + } + + private static WaveHeader ReadHeader(BinaryReader reader) + { + byte[] bytes = reader.ReadBytes(Marshal.SizeOf(typeof(WaveHeader))); + + GCHandle handle = GCHandle.Alloc(bytes, GCHandleType.Pinned); + WaveHeader header = (WaveHeader)Marshal.PtrToStructure(handle.AddrOfPinnedObject(), typeof(WaveHeader))!; + handle.Free(); + + return header; + } + + private void SkipMetaData(BinaryReader reader) + { + var bs = reader.BaseStream; + + Int32 subChunk2ID = _header.SubChunk2ID; + Int32 subChunk2Size = _header.SubChunk2Size; + + while (bs.Position != bs.Length && subChunk2ID != 0x61746164) + { + bs.Seek(subChunk2Size, SeekOrigin.Current); + subChunk2ID = reader.ReadInt32(); + subChunk2Size = reader.ReadInt32(); + } + _header.SubChunk2ID = subChunk2ID; + _header.SubChunk2Size = subChunk2Size; + } + + private WaveHeader _header; + + // Samples are normalized to the range [-1, 1] + private float[] _samples; + + public int SampleRate => _header.SampleRate; + public float[] Samples => _samples; + + public static void Test(String fileName) + { + WaveReader reader = new WaveReader(fileName); + Console.WriteLine($"samples length: {reader.Samples.Length}"); + Console.WriteLine($"samples rate: {reader.SampleRate}"); + } + } + +} diff --git a/dotnet-examples/online-decode-files/online-decode-files.csproj b/dotnet-examples/online-decode-files/online-decode-files.csproj new file mode 100644 index 00000000..f3f64089 --- /dev/null +++ b/dotnet-examples/online-decode-files/online-decode-files.csproj @@ -0,0 +1,16 @@ + + + + Exe + net6.0 + online_decode_files + enable + enable + + + + + + + + diff --git a/dotnet-examples/online-decode-files/run.sh b/dotnet-examples/online-decode-files/run.sh new file mode 100755 index 00000000..4023f1ad --- /dev/null +++ b/dotnet-examples/online-decode-files/run.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +# Please refer to +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english +# to download the model files + +if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 + cd sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 + git lfs pull --include "*.onnx" + cd .. +fi + +dotnet run -c Release \ + --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \ + --encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \ + --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx \ + --joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \ + --decoding-method greedy_search \ + --files ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav \ + ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav \ diff --git a/dotnet-examples/sherpa-onnx.sln b/dotnet-examples/sherpa-onnx.sln new file mode 100644 index 00000000..66dac3c1 --- /dev/null +++ b/dotnet-examples/sherpa-onnx.sln @@ -0,0 +1,28 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.0.31903.59 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "online-decode-files", "online-decode-files\online-decode-files.csproj", "{45307474-BECB-4ABE-9388-D01D55A1A9BE}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-decode-files", "offline-decode-files\offline-decode-files.csproj", "{2DAB152C-9E24-47A0-9DB0-781297ECE458}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {45307474-BECB-4ABE-9388-D01D55A1A9BE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {45307474-BECB-4ABE-9388-D01D55A1A9BE}.Debug|Any CPU.Build.0 = Debug|Any CPU + {45307474-BECB-4ABE-9388-D01D55A1A9BE}.Release|Any CPU.ActiveCfg = Release|Any CPU + {45307474-BECB-4ABE-9388-D01D55A1A9BE}.Release|Any CPU.Build.0 = Release|Any CPU + {2DAB152C-9E24-47A0-9DB0-781297ECE458}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {2DAB152C-9E24-47A0-9DB0-781297ECE458}.Debug|Any CPU.Build.0 = Debug|Any CPU + {2DAB152C-9E24-47A0-9DB0-781297ECE458}.Release|Any CPU.ActiveCfg = Release|Any CPU + {2DAB152C-9E24-47A0-9DB0-781297ECE458}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection +EndGlobal diff --git a/scripts/dotnet/.gitignore b/scripts/dotnet/.gitignore new file mode 100644 index 00000000..c82f824d --- /dev/null +++ b/scripts/dotnet/.gitignore @@ -0,0 +1,5 @@ +all +macos +linux +windows +packages diff --git a/scripts/dotnet/README.md b/scripts/dotnet/README.md new file mode 100644 index 00000000..f7dbcffd --- /dev/null +++ b/scripts/dotnet/README.md @@ -0,0 +1,17 @@ +# Introduction + +[sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx) is an open-source +real-time speech recognition toolkit developed +by the Next-gen Kaldi team. + +It supports streaming recognition on a variety of +platforms such as Android, iOS, Raspberry, Linux, Windows, macOS, etc. + +It does not require Internet connection during recognition. + +See the documentation https://k2-fsa.github.io/sherpa/onnx/index.html +for details. + +Please see +https://github.com/k2-fsa/sherpa-onnx/tree/dot-net/dotnet-examples +for how to use C# APIs of this package. diff --git a/scripts/dotnet/generate.py b/scripts/dotnet/generate.py new file mode 100755 index 00000000..ffe83f5a --- /dev/null +++ b/scripts/dotnet/generate.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 +# Copyright (c) 2023 Xiaomi Corporation + +import argparse +import re +from pathlib import Path + +import jinja2 + +SHERPA_ONNX_DIR = Path(__file__).resolve().parent.parent.parent + + +def get_version(): + cmake_file = SHERPA_ONNX_DIR / "CMakeLists.txt" + with open(cmake_file) as f: + content = f.read() + + version = re.search(r"set\(SHERPA_ONNX_VERSION (.*)\)", content).group(1) + return version.strip('"') + + +def read_proj_file(filename): + with open(filename) as f: + return f.read() + + +def get_dict(): + version = get_version() + return { + "version": get_version(), + } + + +def process_linux(s): + libs = [ + "libkaldi-native-fbank-core.so", + "libonnxruntime.so.1.14.0", + "libsherpa-onnx-c-api.so", + "libsherpa-onnx-core.so", + ] + prefix = f"{SHERPA_ONNX_DIR}/linux/sherpa_onnx/lib/" + libs = [prefix + lib for lib in libs] + libs = "\n ;".join(libs) + + d = get_dict() + d["dotnet_rid"] = "linux-x64" + d["libs"] = libs + + environment = jinja2.Environment() + template = environment.from_string(s) + s = template.render(**d) + with open("./linux/sherpa-onnx.runtime.csproj", "w") as f: + f.write(s) + + +def process_macos(s): + libs = [ + "libkaldi-native-fbank-core.dylib", + "libonnxruntime.1.14.0.dylib", + "libsherpa-onnx-c-api.dylib", + "libsherpa-onnx-core.dylib", + ] + prefix = f"{SHERPA_ONNX_DIR}/macos/sherpa_onnx/lib/" + libs = [prefix + lib for lib in libs] + libs = "\n ;".join(libs) + + d = get_dict() + d["dotnet_rid"] = "osx-x64" + d["libs"] = libs + + environment = jinja2.Environment() + template = environment.from_string(s) + s = template.render(**d) + with open("./macos/sherpa-onnx.runtime.csproj", "w") as f: + f.write(s) + + +def process_windows(s): + libs = [ + "kaldi-native-fbank-core.dll", + "onnxruntime.dll", + "sherpa-onnx-c-api.dll", + "sherpa-onnx-core.dll", + ] + prefix = f"{SHERPA_ONNX_DIR}/windows/sherpa_onnx/lib/" + libs = [prefix + lib for lib in libs] + libs = "\n ;".join(libs) + + d = get_dict() + d["dotnet_rid"] = "win-x64" + d["libs"] = libs + + environment = jinja2.Environment() + template = environment.from_string(s) + s = template.render(**d) + with open("./windows/sherpa-onnx.runtime.csproj", "w") as f: + f.write(s) + + +def main(): + s = read_proj_file("./sherpa-onnx.csproj.runtime.in") + process_macos(s) + process_linux(s) + process_windows(s) + + s = read_proj_file("./sherpa-onnx.csproj.in") + d = get_dict() + d["packages_dir"] = str(SHERPA_ONNX_DIR / "scripts/dotnet/packages") + + environment = jinja2.Environment() + template = environment.from_string(s) + s = template.render(**d) + with open("./all/sherpa-onnx.csproj", "w") as f: + f.write(s) + + +if __name__ == "__main__": + main() diff --git a/scripts/dotnet/offline.cs b/scripts/dotnet/offline.cs new file mode 100644 index 00000000..295f88f7 --- /dev/null +++ b/scripts/dotnet/offline.cs @@ -0,0 +1,259 @@ +/// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) +/// Copyright (c) 2023 by manyeyes + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System; + +namespace SherpaOnnx +{ + + [StructLayout(LayoutKind.Sequential)] + public struct OfflineTransducerModelConfig + { + public OfflineTransducerModelConfig() + { + Encoder = ""; + Decoder = ""; + Joiner = ""; + } + [MarshalAs(UnmanagedType.LPStr)] + public string Encoder; + + [MarshalAs(UnmanagedType.LPStr)] + public string Decoder; + + [MarshalAs(UnmanagedType.LPStr)] + public string Joiner; + } + + [StructLayout(LayoutKind.Sequential)] + public struct OfflineParaformerModelConfig + { + public OfflineParaformerModelConfig() + { + Model = ""; + } + [MarshalAs(UnmanagedType.LPStr)] + public string Model; + } + + [StructLayout(LayoutKind.Sequential)] + public struct OfflineNemoEncDecCtcModelConfig + { + public OfflineNemoEncDecCtcModelConfig() + { + Model = ""; + } + [MarshalAs(UnmanagedType.LPStr)] + public string Model; + } + + [StructLayout(LayoutKind.Sequential)] + public struct OfflineLMConfig + { + public OfflineLMConfig() + { + Model = ""; + Scale = 0.5F; + } + [MarshalAs(UnmanagedType.LPStr)] + public string Model; + + public float Scale; + } + + [StructLayout(LayoutKind.Sequential)] + public struct OfflineModelConfig + { + public OfflineModelConfig() + { + Transducer = new OfflineTransducerModelConfig(); + Paraformer = new OfflineParaformerModelConfig(); + NeMoCtc = new OfflineNemoEncDecCtcModelConfig(); + Tokens = ""; + NumThreads = 1; + Debug = 0; + } + public OfflineTransducerModelConfig Transducer; + public OfflineParaformerModelConfig Paraformer; + public OfflineNemoEncDecCtcModelConfig NeMoCtc; + + [MarshalAs(UnmanagedType.LPStr)] + public string Tokens; + + public int NumThreads; + + public int Debug; + } + + [StructLayout(LayoutKind.Sequential)] + public struct OfflineRecognizerConfig + { + public OfflineRecognizerConfig() + { + FeatConfig = new FeatureConfig(); + ModelConfig = new OfflineModelConfig(); + LmConfig = new OfflineLMConfig(); + + DecodingMethod = "greedy_search"; + MaxActivePaths = 4; + + } + public FeatureConfig FeatConfig; + public OfflineModelConfig ModelConfig; + public OfflineLMConfig LmConfig; + + [MarshalAs(UnmanagedType.LPStr)] + public string DecodingMethod; + + public int MaxActivePaths; + } + + public class OfflineRecognizerResult + { + public OfflineRecognizerResult(IntPtr handle) + { + Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl)); + _text = Marshal.PtrToStringUTF8(impl.Text); + } + + [StructLayout(LayoutKind.Sequential)] + struct Impl + { + public IntPtr Text; + } + + private String _text; + public String Text => _text; + } + + public class OfflineStream : IDisposable + { + public OfflineStream(IntPtr p) + { + _handle = new HandleRef(this, p); + } + + public void AcceptWaveform(int sampleRate, float[] samples) + { + AcceptWaveform(Handle, sampleRate, samples, samples.Length); + } + + public OfflineRecognizerResult Result + { + get + { + IntPtr h = GetResult(_handle.Handle); + OfflineRecognizerResult result = new OfflineRecognizerResult(h); + DestroyResult(h); + return result; + } + } + + ~OfflineStream() + { + Cleanup(); + } + + public void Dispose() + { + Cleanup(); + // Prevent the object from being placed on the + // finalization queue + System.GC.SuppressFinalize(this); + } + + private void Cleanup() + { + DestroyOfflineStream(Handle); + + // Don't permit the handle to be used again. + _handle = new HandleRef(this, IntPtr.Zero); + } + + private HandleRef _handle; + public IntPtr Handle => _handle.Handle; + + [DllImport(Dll.Filename)] + private static extern void DestroyOfflineStream(IntPtr handle); + + [DllImport(Dll.Filename, EntryPoint = "AcceptWaveformOffline")] + private static extern void AcceptWaveform(IntPtr handle, int sampleRate, float[] samples, int n); + + [DllImport(Dll.Filename, EntryPoint = "GetOfflineStreamResult")] + private static extern IntPtr GetResult(IntPtr handle); + + [DllImport(Dll.Filename, EntryPoint = "DestroyOfflineRecognizerResult")] + private static extern void DestroyResult(IntPtr handle); + } + + public class OfflineRecognizer : IDisposable + { + public OfflineRecognizer(OfflineRecognizerConfig config) + { + IntPtr h = CreateOfflineRecognizer(ref config); + _handle = new HandleRef(this, h); + } + + public OfflineStream CreateStream() + { + IntPtr p = CreateOfflineStream(_handle.Handle); + return new OfflineStream(p); + } + + /// You have to ensure that IsReady(stream) returns true before + /// you call this method + public void Decode(OfflineStream stream) + { + Decode(_handle.Handle, stream.Handle); + } + + // The caller should ensure all passed streams are ready for decoding. + public void Decode(IEnumerable streams) + { + IntPtr[] ptrs = streams.Select(s => s.Handle).ToArray(); + Decode(_handle.Handle, ptrs, ptrs.Length); + } + + public void Dispose() + { + Cleanup(); + // Prevent the object from being placed on the + // finalization queue + System.GC.SuppressFinalize(this); + } + + ~OfflineRecognizer() + { + Cleanup(); + } + + private void Cleanup() + { + DestroyOfflineRecognizer(_handle.Handle); + + // Don't permit the handle to be used again. + _handle = new HandleRef(this, IntPtr.Zero); + } + + private HandleRef _handle; + + [DllImport(Dll.Filename)] + private static extern IntPtr CreateOfflineRecognizer(ref OfflineRecognizerConfig config); + + [DllImport(Dll.Filename)] + private static extern void DestroyOfflineRecognizer(IntPtr handle); + + [DllImport(Dll.Filename)] + private static extern IntPtr CreateOfflineStream(IntPtr handle); + + [DllImport(Dll.Filename, EntryPoint = "DecodeOfflineStream")] + private static extern void Decode(IntPtr handle, IntPtr stream); + + [DllImport(Dll.Filename, EntryPoint = "DecodeMultipleOfflineStreams")] + private static extern void Decode(IntPtr handle, IntPtr[] streams, int n); + } + +} diff --git a/scripts/dotnet/online.cs b/scripts/dotnet/online.cs new file mode 100644 index 00000000..a51b72e3 --- /dev/null +++ b/scripts/dotnet/online.cs @@ -0,0 +1,291 @@ +/// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) +/// Copyright (c) 2023 by manyeyes + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System; + +namespace SherpaOnnx +{ + internal static class Dll + { + public const string Filename = "sherpa-onnx-c-api"; + } + + [StructLayout(LayoutKind.Sequential)] + public struct OnlineTransducerModelConfig + { + public OnlineTransducerModelConfig() + { + Encoder = ""; + Decoder = ""; + Joiner = ""; + Tokens = ""; + NumThreads = 1; + Debug = 0; + } + [MarshalAs(UnmanagedType.LPStr)] + public string Encoder; + + [MarshalAs(UnmanagedType.LPStr)] + public string Decoder; + + [MarshalAs(UnmanagedType.LPStr)] + public string Joiner; + + [MarshalAs(UnmanagedType.LPStr)] + public string Tokens; + + /// Number of threads used to run the neural network model + public int NumThreads; + + /// true to print debug information of the model + public int Debug; + } + + /// It expects 16 kHz 16-bit single channel wave format. + [StructLayout(LayoutKind.Sequential)] + public struct FeatureConfig + { + public FeatureConfig() + { + SampleRate = 16000; + FeatureDim = 80; + } + /// Sample rate of the input data. MUST match the one expected + /// by the model. For instance, it should be 16000 for models provided + /// by us. + public int SampleRate; + + /// Feature dimension of the model. + /// For instance, it should be 80 for models provided by us. + public int FeatureDim; + } + + [StructLayout(LayoutKind.Sequential)] + public struct OnlineRecognizerConfig + { + public OnlineRecognizerConfig() + { + FeatConfig = new FeatureConfig(); + TransducerModelConfig = new OnlineTransducerModelConfig(); + DecodingMethod = "greedy_search"; + MaxActivePaths = 4; + EnableEndpoint = 0; + Rule1MinTrailingSilence = 1.2F; + Rule2MinTrailingSilence = 2.4F; + Rule3MinUtteranceLength = 20.0F; + } + public FeatureConfig FeatConfig; + public OnlineTransducerModelConfig TransducerModelConfig; + + [MarshalAs(UnmanagedType.LPStr)] + public string DecodingMethod; + + /// Used only when decoding_method is modified_beam_search + /// Example value: 4 + public int MaxActivePaths; + + /// 0 to disable endpoint detection. + /// A non-zero value to enable endpoint detection. + public int EnableEndpoint; + + /// An endpoint is detected if trailing silence in seconds is larger than + /// this value even if nothing has been decoded. + /// Used only when enable_endpoint is not 0. + public float Rule1MinTrailingSilence; + + /// An endpoint is detected if trailing silence in seconds is larger than + /// this value after something that is not blank has been decoded. + /// Used only when enable_endpoint is not 0. + public float Rule2MinTrailingSilence; + + /// An endpoint is detected if the utterance in seconds is larger than + /// this value. + /// Used only when enable_endpoint is not 0. + public float Rule3MinUtteranceLength; + } + + public class OnlineRecognizerResult + { + public OnlineRecognizerResult(IntPtr handle) + { + Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl)); + _text = Marshal.PtrToStringUTF8(impl.Text); + } + + [StructLayout(LayoutKind.Sequential)] + struct Impl + { + public IntPtr Text; + } + + private String _text; + public String Text => _text; + } + + public class OnlineStream : IDisposable + { + public OnlineStream(IntPtr p) + { + _handle = new HandleRef(this, p); + } + + public void AcceptWaveform(int sampleRate, float[] samples) + { + AcceptWaveform(Handle, sampleRate, samples, samples.Length); + } + + public void InputFinished() + { + InputFinished(Handle); + } + + ~OnlineStream() + { + Cleanup(); + } + + public void Dispose() + { + Cleanup(); + // Prevent the object from being placed on the + // finalization queue + System.GC.SuppressFinalize(this); + } + + private void Cleanup() + { + DestroyOnlineStream(Handle); + + // Don't permit the handle to be used again. + _handle = new HandleRef(this, IntPtr.Zero); + } + + private HandleRef _handle; + public IntPtr Handle => _handle.Handle; + + [DllImport(Dll.Filename)] + private static extern void DestroyOnlineStream(IntPtr handle); + + [DllImport(Dll.Filename)] + private static extern void AcceptWaveform(IntPtr handle, int sampleRate, float[] samples, int n); + + [DllImport(Dll.Filename)] + private static extern void InputFinished(IntPtr handle); + } + + // please see + // https://www.mono-project.com/docs/advanced/pinvoke/#gc-safe-pinvoke-code + // https://www.mono-project.com/docs/advanced/pinvoke/#properly-disposing-of-resources + public class OnlineRecognizer : IDisposable + { + public OnlineRecognizer(OnlineRecognizerConfig config) + { + IntPtr h = CreateOnlineRecognizer(ref config); + _handle = new HandleRef(this, h); + } + + public OnlineStream CreateStream() + { + IntPtr p = CreateOnlineStream(_handle.Handle); + return new OnlineStream(p); + } + + /// Return true if the passed stream is ready for decoding. + public bool IsReady(OnlineStream stream) + { + return IsReady(_handle.Handle, stream.Handle) != 0; + } + + /// Return true if an endpoint is detected for this stream. + /// You probably need to invoke Reset(stream) when this method returns + /// true. + public bool IsEndpoint(OnlineStream stream) + { + return IsEndpoint(_handle.Handle, stream.Handle) != 0; + } + + /// You have to ensure that IsReady(stream) returns true before + /// you call this method + public void Decode(OnlineStream stream) + { + Decode(_handle.Handle, stream.Handle); + } + + // The caller should ensure all passed streams are ready for decoding. + public void Decode(IEnumerable streams) + { + IntPtr[] ptrs = streams.Select(s => s.Handle).ToArray(); + Decode(_handle.Handle, ptrs, ptrs.Length); + } + + public OnlineRecognizerResult GetResult(OnlineStream stream) + { + IntPtr h = GetResult(_handle.Handle, stream.Handle); + OnlineRecognizerResult result = new OnlineRecognizerResult(h); + DestroyResult(h); + return result; + } + + /// When this method returns, IsEndpoint(stream) will return false. + public void Reset(OnlineStream stream) + { + Reset(_handle.Handle, stream.Handle); + } + + public void Dispose() + { + Cleanup(); + // Prevent the object from being placed on the + // finalization queue + System.GC.SuppressFinalize(this); + } + + ~OnlineRecognizer() + { + Cleanup(); + } + + private void Cleanup() + { + DestroyOnlineRecognizer(_handle.Handle); + + // Don't permit the handle to be used again. + _handle = new HandleRef(this, IntPtr.Zero); + } + + private HandleRef _handle; + + [DllImport(Dll.Filename)] + private static extern IntPtr CreateOnlineRecognizer(ref OnlineRecognizerConfig config); + + [DllImport(Dll.Filename)] + private static extern void DestroyOnlineRecognizer(IntPtr handle); + + [DllImport(Dll.Filename)] + private static extern IntPtr CreateOnlineStream(IntPtr handle); + + [DllImport(Dll.Filename, EntryPoint = "IsOnlineStreamReady")] + private static extern int IsReady(IntPtr handle, IntPtr stream); + + [DllImport(Dll.Filename, EntryPoint = "DecodeOnlineStream")] + private static extern void Decode(IntPtr handle, IntPtr stream); + + [DllImport(Dll.Filename, EntryPoint = "DecodeMultipleOnlineStreams")] + private static extern void Decode(IntPtr handle, IntPtr[] streams, int n); + + [DllImport(Dll.Filename, EntryPoint = "GetOnlineStreamResult")] + private static extern IntPtr GetResult(IntPtr handle, IntPtr stream); + + [DllImport(Dll.Filename, EntryPoint = "DestroyOnlineRecognizerResult")] + private static extern void DestroyResult(IntPtr result); + + [DllImport(Dll.Filename)] + private static extern void Reset(IntPtr handle, IntPtr stream); + + [DllImport(Dll.Filename)] + private static extern int IsEndpoint(IntPtr handle, IntPtr stream); + } +} diff --git a/scripts/dotnet/run.sh b/scripts/dotnet/run.sh new file mode 100755 index 00000000..03fd7990 --- /dev/null +++ b/scripts/dotnet/run.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# Copyright (c) 2023 Xiaomi Corporation + +set -ex + +mkdir -p macos linux windows all + +cp ./online.cs all +cp ./offline.cs all + +./generate.py + +pushd linux +dotnet build -c Release +dotnet pack -c Release -o ../packages +popd + +pushd macos +dotnet build -c Release +dotnet pack -c Release -o ../packages +popd + +pushd windows +dotnet build -c Release +dotnet pack -c Release -o ../packages +popd + +pushd all +dotnet build -c Release +dotnet pack -c Release -o ../packages +popd + +ls -lh packages diff --git a/scripts/dotnet/sherpa-onnx.csproj.in b/scripts/dotnet/sherpa-onnx.csproj.in new file mode 100644 index 00000000..2f40237e --- /dev/null +++ b/scripts/dotnet/sherpa-onnx.csproj.in @@ -0,0 +1,56 @@ + + + Apache-2.0 + README.md + Library + 10.0 + netstandard2.1;netcoreapp3.1;net6.0;net7.0 + linux-x64;osx-x64;win-x64 + true + sherpa-onnx + {{ version }} + + https://github.com/k2-fsa/sherpa-onnx + https://github.com/k2-fsa/sherpa-onnx + speech recognition voice audio stt asr speech-to-text AI offline + privacy open-sourced next-gen-kaldi k2 kaldi2 sherpa-onnx + + The Next-gen Kaldi development team + The Next-gen Kaldi development team + Xiaomi Corporation + Copyright 2019-2023 Xiaomi Corporation + sherpa-onnx is an open-source real-time speech recognition toolkit developed + by the Next-gen Kaldi team. It supports streaming recognition on a variety of + platforms such as Android, iOS, Raspberry, Linux, Windows, macOS, etc. + + It does not require Internet connection during recognition. + + See the documentation https://k2-fsa.github.io/sherpa/onnx/index.html + for details. + + + + sherpa-onnx v{{ version }} + org.k2fsa.sherpa.onnx + + + false + false + false + + + + {{ packages_dir }};$(RestoreSources);https://api.nuget.org/v3/index.json + + + + + + + + + + + + + diff --git a/scripts/dotnet/sherpa-onnx.csproj.runtime.in b/scripts/dotnet/sherpa-onnx.csproj.runtime.in new file mode 100644 index 00000000..f364f8aa --- /dev/null +++ b/scripts/dotnet/sherpa-onnx.csproj.runtime.in @@ -0,0 +1,50 @@ + + + Apache-2.0 + README.md + Library + netstandard2.0;netcoreapp3.1;net6.0 + {{ dotnet_rid }} + sherpa-onnx + {{ version }} + + https://github.com/k2-fsa/sherpa-onnx + https://github.com/k2-fsa/sherpa-onnx + speech recognition voice audio stt asr speech-to-text AI offline + privacy open-sourced next-gen-kaldi k2 kaldi2 sherpa-onnx + + + .NET native {{ dotnet_rid }} wrapper for the sherpa-onnx project. + + In general, you don't need to use this package directly. + + Please use https://www.nuget.org/packages/org.k2fsa.sherpa.onnx instead + + false + + + sherpa-onnx {{ dotnet_rid }} v{{ version }} + org.k2fsa.sherpa.onnx.runtime.{{ dotnet_rid }} + + + false + false + false + + + + + + + + + + + runtimes/{{ dotnet_rid }}/native/%(Filename)%(Extension) + true + PreserveNewest + + + diff --git a/sherpa-onnx/c-api/CMakeLists.txt b/sherpa-onnx/c-api/CMakeLists.txt index 95f98d92..c0da8ca8 100644 --- a/sherpa-onnx/c-api/CMakeLists.txt +++ b/sherpa-onnx/c-api/CMakeLists.txt @@ -2,6 +2,11 @@ include_directories(${CMAKE_SOURCE_DIR}) add_library(sherpa-onnx-c-api c-api.cc) target_link_libraries(sherpa-onnx-c-api sherpa-onnx-core) +if(BUILD_SHARED_LIBS) + target_compile_definitions(sherpa-onnx-c-api PRIVATE SHERPA_ONNX_BUILD_SHARED_LIBS=1) + target_compile_definitions(sherpa-onnx-c-api PRIVATE SHERPA_ONNX_BUILD_MAIN_LIB=1) +endif() + install(TARGETS sherpa-onnx-c-api DESTINATION lib) install(FILES c-api.h diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index 022e2e76..45909bba 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -10,10 +10,11 @@ #include #include "sherpa-onnx/csrc/display.h" +#include "sherpa-onnx/csrc/offline-recognizer.h" #include "sherpa-onnx/csrc/online-recognizer.h" struct SherpaOnnxOnlineRecognizer { - sherpa_onnx::OnlineRecognizer *impl; + std::unique_ptr impl; }; struct SherpaOnnxOnlineStream { @@ -56,14 +57,19 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer( recognizer_config.endpoint_config.rule3.min_utterance_length = config->rule3_min_utterance_length; + if (config->model_config.debug) { + fprintf(stderr, "%s\n", recognizer_config.ToString().c_str()); + } + SherpaOnnxOnlineRecognizer *recognizer = new SherpaOnnxOnlineRecognizer; - recognizer->impl = new sherpa_onnx::OnlineRecognizer(recognizer_config); + + recognizer->impl = + std::make_unique(recognizer_config); return recognizer; } void DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer *recognizer) { - delete recognizer->impl; delete recognizer; } @@ -144,3 +150,116 @@ void DestroyDisplay(SherpaOnnxDisplay *display) { delete display; } void SherpaOnnxPrint(SherpaOnnxDisplay *display, int32_t idx, const char *s) { display->impl->Print(idx, s); } + +// ============================================================ +// For offline ASR (i.e., non-streaming ASR) +// ============================================================ +// +struct SherpaOnnxOfflineRecognizer { + std::unique_ptr impl; +}; + +struct SherpaOnnxOfflineStream { + std::unique_ptr impl; + explicit SherpaOnnxOfflineStream( + std::unique_ptr p) + : impl(std::move(p)) {} +}; + +SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer( + const SherpaOnnxOfflineRecognizerConfig *config) { + sherpa_onnx::OfflineRecognizerConfig recognizer_config; + + recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate; + + recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim; + + recognizer_config.model_config.transducer.encoder_filename = + config->model_config.transducer.encoder; + + recognizer_config.model_config.transducer.decoder_filename = + config->model_config.transducer.decoder; + + recognizer_config.model_config.transducer.joiner_filename = + config->model_config.transducer.joiner; + + recognizer_config.model_config.paraformer.model = + config->model_config.paraformer.model; + + recognizer_config.model_config.nemo_ctc.model = + config->model_config.nemo_ctc.model; + + recognizer_config.model_config.tokens = config->model_config.tokens; + recognizer_config.model_config.num_threads = config->model_config.num_threads; + recognizer_config.model_config.debug = config->model_config.debug; + + recognizer_config.lm_config.model = config->lm_config.model; + recognizer_config.lm_config.scale = config->lm_config.scale; + + recognizer_config.decoding_method = config->decoding_method; + recognizer_config.max_active_paths = config->max_active_paths; + + if (config->model_config.debug) { + fprintf(stderr, "%s\n", recognizer_config.ToString().c_str()); + } + + SherpaOnnxOfflineRecognizer *recognizer = new SherpaOnnxOfflineRecognizer; + + recognizer->impl = + std::make_unique(recognizer_config); + + return recognizer; +} + +void DestroyOfflineRecognizer(SherpaOnnxOfflineRecognizer *recognizer) { + delete recognizer; +} + +SherpaOnnxOfflineStream *CreateOfflineStream( + const SherpaOnnxOfflineRecognizer *recognizer) { + SherpaOnnxOfflineStream *stream = + new SherpaOnnxOfflineStream(recognizer->impl->CreateStream()); + return stream; +} + +void DestoryOfflineStream(SherpaOnnxOfflineStream *stream) { delete stream; } + +void AcceptWaveformOffline(SherpaOnnxOfflineStream *stream, int32_t sample_rate, + const float *samples, int32_t n) { + stream->impl->AcceptWaveform(sample_rate, samples, n); +} + +void DecodeOfflineStream(SherpaOnnxOfflineRecognizer *recognizer, + SherpaOnnxOfflineStream *stream) { + recognizer->impl->DecodeStream(stream->impl.get()); +} + +void DecodeMultipleOfflineStreams(SherpaOnnxOfflineRecognizer *recognizer, + SherpaOnnxOfflineStream **streams, + int32_t n) { + std::vector ss(n); + for (int32_t i = 0; i != n; ++i) { + ss[i] = streams[i]->impl.get(); + } + recognizer->impl->DecodeStreams(ss.data(), n); +} + +SherpaOnnxOfflineRecognizerResult *GetOfflineStreamResult( + SherpaOnnxOfflineStream *stream) { + const sherpa_onnx::OfflineRecognitionResult &result = + stream->impl->GetResult(); + const auto &text = result.text; + + auto r = new SherpaOnnxOfflineRecognizerResult; + r->text = new char[text.size() + 1]; + std::copy(text.begin(), text.end(), const_cast(r->text)); + const_cast(r->text)[text.size()] = 0; + + return r; +} + +void DestroyOfflineRecognizerResult( + const SherpaOnnxOfflineRecognizerResult *r) { + delete[] r->text; + delete r; +} diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h index e979d9aa..9678e346 100644 --- a/sherpa-onnx/c-api/c-api.h +++ b/sherpa-onnx/c-api/c-api.h @@ -18,12 +18,35 @@ extern "C" { #endif +// See https://github.com/pytorch/pytorch/blob/main/c10/macros/Export.h +// We will set SHERPA_ONNX_BUILD_SHARED_LIBS and SHERPA_ONNX_BUILD_MAIN_LIB in +// CMakeLists.txt + +#if defined(_WIN32) +#if defined(SHERPA_ONNX_BUILD_SHARED_LIBS) +#define SHERPA_ONNX_EXPORT __declspec(dllexport) +#define SHERPA_ONNX_IMPORT __declspec(dllimport) +#else +#define SHERPA_ONNX_EXPORT +#define SHERPA_ONNX_IMPORT +#endif +#else // WIN32 +#define SHERPA_ONNX_EXPORT __attribute__((__visibility__("default"))) +#define SHERPA_ONNX_IMPORT SHERPA_ONNX_EXPORT +#endif + +#if defined(SHERPA_ONNX_BUILD_MAIN_LIB) +#define SHERPA_ONNX_API SHERPA_ONNX_EXPORT +#else +#define SHERPA_ONNX_API SHERPA_ONNX_IMPORT +#endif + /// Please refer to /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html /// to download pre-trained models. That is, you can find encoder-xxx.onnx /// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct /// from there. -typedef struct SherpaOnnxOnlineTransducerModelConfig { +SHERPA_ONNX_API typedef struct SherpaOnnxOnlineTransducerModelConfig { const char *encoder; const char *decoder; const char *joiner; @@ -33,7 +56,7 @@ typedef struct SherpaOnnxOnlineTransducerModelConfig { } SherpaOnnxOnlineTransducerModelConfig; /// It expects 16 kHz 16-bit single channel wave format. -typedef struct SherpaOnnxFeatureConfig { +SHERPA_ONNX_API typedef struct SherpaOnnxFeatureConfig { /// Sample rate of the input data. MUST match the one expected /// by the model. For instance, it should be 16000 for models provided /// by us. @@ -44,7 +67,7 @@ typedef struct SherpaOnnxFeatureConfig { int32_t feature_dim; } SherpaOnnxFeatureConfig; -typedef struct SherpaOnnxOnlineRecognizerConfig { +SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerConfig { SherpaOnnxFeatureConfig feat_config; SherpaOnnxOnlineTransducerModelConfig model_config; @@ -75,7 +98,7 @@ typedef struct SherpaOnnxOnlineRecognizerConfig { float rule3_min_utterance_length; } SherpaOnnxOnlineRecognizerConfig; -typedef struct SherpaOnnxOnlineRecognizerResult { +SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerResult { const char *text; // TODO(fangjun): Add more fields } SherpaOnnxOnlineRecognizerResult; @@ -83,32 +106,34 @@ typedef struct SherpaOnnxOnlineRecognizerResult { /// Note: OnlineRecognizer here means StreamingRecognizer. /// It does not need to access the Internet during recognition. /// Everything is run locally. -typedef struct SherpaOnnxOnlineRecognizer SherpaOnnxOnlineRecognizer; -typedef struct SherpaOnnxOnlineStream SherpaOnnxOnlineStream; +SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizer + SherpaOnnxOnlineRecognizer; +SHERPA_ONNX_API typedef struct SherpaOnnxOnlineStream SherpaOnnxOnlineStream; -/// @param config Config for the recongizer. +/// @param config Config for the recognizer. /// @return Return a pointer to the recognizer. The user has to invoke // DestroyOnlineRecognizer() to free it to avoid memory leak. -SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer( +SHERPA_ONNX_API SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer( const SherpaOnnxOnlineRecognizerConfig *config); /// Free a pointer returned by CreateOnlineRecognizer() /// /// @param p A pointer returned by CreateOnlineRecognizer() -void DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer *recognizer); +SHERPA_ONNX_API void DestroyOnlineRecognizer( + SherpaOnnxOnlineRecognizer *recognizer); /// Create an online stream for accepting wave samples. /// /// @param recognizer A pointer returned by CreateOnlineRecognizer() /// @return Return a pointer to an OnlineStream. The user has to invoke /// DestoryOnlineStream() to free it to avoid memory leak. -SherpaOnnxOnlineStream *CreateOnlineStream( +SHERPA_ONNX_API SherpaOnnxOnlineStream *CreateOnlineStream( const SherpaOnnxOnlineRecognizer *recognizer); -/// Destory an online stream. +/// Destroy an online stream. /// /// @param stream A pointer returned by CreateOnlineStream() -void DestoryOnlineStream(SherpaOnnxOnlineStream *stream); +SHERPA_ONNX_API void DestoryOnlineStream(SherpaOnnxOnlineStream *stream); /// Accept input audio samples and compute the features. /// The user has to invoke DecodeOnlineStream() to run the neural network and @@ -121,16 +146,17 @@ void DestoryOnlineStream(SherpaOnnxOnlineStream *stream); /// @param samples A pointer to a 1-D array containing audio samples. /// The range of samples has to be normalized to [-1, 1]. /// @param n Number of elements in the samples array. -void AcceptWaveform(SherpaOnnxOnlineStream *stream, int32_t sample_rate, - const float *samples, int32_t n); +SHERPA_ONNX_API void AcceptWaveform(SherpaOnnxOnlineStream *stream, + int32_t sample_rate, const float *samples, + int32_t n); /// Return 1 if there are enough number of feature frames for decoding. /// Return 0 otherwise. /// /// @param recognizer A pointer returned by CreateOnlineRecognizer /// @param stream A pointer returned by CreateOnlineStream -int32_t IsOnlineStreamReady(SherpaOnnxOnlineRecognizer *recognizer, - SherpaOnnxOnlineStream *stream); +SHERPA_ONNX_API int32_t IsOnlineStreamReady( + SherpaOnnxOnlineRecognizer *recognizer, SherpaOnnxOnlineStream *stream); /// Call this function to run the neural network model and decoding. // @@ -142,8 +168,8 @@ int32_t IsOnlineStreamReady(SherpaOnnxOnlineRecognizer *recognizer, /// DecodeOnlineStream(recognizer, stream); /// } /// -void DecodeOnlineStream(SherpaOnnxOnlineRecognizer *recognizer, - SherpaOnnxOnlineStream *stream); +SHERPA_ONNX_API void DecodeOnlineStream(SherpaOnnxOnlineRecognizer *recognizer, + SherpaOnnxOnlineStream *stream); /// This function is similar to DecodeOnlineStream(). It decodes multiple /// OnlineStream in parallel. @@ -155,8 +181,9 @@ void DecodeOnlineStream(SherpaOnnxOnlineRecognizer *recognizer, /// @param streams A pointer array containing pointers returned by /// CreateOnlineRecognizer() /// @param n Number of elements in the given streams array. -void DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer *recognizer, - SherpaOnnxOnlineStream **streams, int32_t n); +SHERPA_ONNX_API void DecodeMultipleOnlineStreams( + SherpaOnnxOnlineRecognizer *recognizer, SherpaOnnxOnlineStream **streams, + int32_t n); /// Get the decoding results so far for an OnlineStream. /// @@ -165,47 +192,188 @@ void DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer *recognizer, /// @return A pointer containing the result. The user has to invoke /// DestroyOnlineRecognizerResult() to free the returned pointer to /// avoid memory leak. -SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult( +SHERPA_ONNX_API SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult( SherpaOnnxOnlineRecognizer *recognizer, SherpaOnnxOnlineStream *stream); /// Destroy the pointer returned by GetOnlineStreamResult(). /// /// @param r A pointer returned by GetOnlineStreamResult() -void DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult *r); +SHERPA_ONNX_API void DestroyOnlineRecognizerResult( + const SherpaOnnxOnlineRecognizerResult *r); /// Reset an OnlineStream , which clears the neural network model state /// and the state for decoding. /// /// @param recognizer A pointer returned by CreateOnlineRecognizer(). /// @param stream A pointer returned by CreateOnlineStream -void Reset(SherpaOnnxOnlineRecognizer *recognizer, - SherpaOnnxOnlineStream *stream); +SHERPA_ONNX_API void Reset(SherpaOnnxOnlineRecognizer *recognizer, + SherpaOnnxOnlineStream *stream); /// Signal that no more audio samples would be available. /// After this call, you cannot call AcceptWaveform() any more. /// /// @param stream A pointer returned by CreateOnlineStream() -void InputFinished(SherpaOnnxOnlineStream *stream); +SHERPA_ONNX_API void InputFinished(SherpaOnnxOnlineStream *stream); /// Return 1 if an endpoint has been detected. /// /// @param recognizer A pointer returned by CreateOnlineRecognizer() /// @param stream A pointer returned by CreateOnlineStream() /// @return Return 1 if an endpoint is detected. Return 0 otherwise. -int32_t IsEndpoint(SherpaOnnxOnlineRecognizer *recognizer, - SherpaOnnxOnlineStream *stream); +SHERPA_ONNX_API int32_t IsEndpoint(SherpaOnnxOnlineRecognizer *recognizer, + SherpaOnnxOnlineStream *stream); // for displaying results on Linux/macOS. -typedef struct SherpaOnnxDisplay SherpaOnnxDisplay; +SHERPA_ONNX_API typedef struct SherpaOnnxDisplay SherpaOnnxDisplay; /// Create a display object. Must be freed using DestroyDisplay to avoid /// memory leak. -SherpaOnnxDisplay *CreateDisplay(int32_t max_word_per_line); +SHERPA_ONNX_API SherpaOnnxDisplay *CreateDisplay(int32_t max_word_per_line); -void DestroyDisplay(SherpaOnnxDisplay *display); +SHERPA_ONNX_API void DestroyDisplay(SherpaOnnxDisplay *display); /// Print the result. -void SherpaOnnxPrint(SherpaOnnxDisplay *display, int32_t idx, const char *s); +SHERPA_ONNX_API void SherpaOnnxPrint(SherpaOnnxDisplay *display, int32_t idx, + const char *s); +// ============================================================ +// For offline ASR (i.e., non-streaming ASR) +// ============================================================ + +/// Please refer to +/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html +/// to download pre-trained models. That is, you can find encoder-xxx.onnx +/// decoder-xxx.onnx, and joiner-xxx.onnx for this struct +/// from there. +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTransducerModelConfig { + const char *encoder; + const char *decoder; + const char *joiner; +} SherpaOnnxOfflineTransducerModelConfig; + +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineParaformerModelConfig { + const char *model; +} SherpaOnnxOfflineParaformerModelConfig; + +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineNemoEncDecCtcModelConfig { + const char *model; +} SherpaOnnxOfflineNemoEncDecCtcModelConfig; + +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineLMConfig { + const char *model; + float scale; +} SherpaOnnxOfflineLMConfig; + +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig { + SherpaOnnxOfflineTransducerModelConfig transducer; + SherpaOnnxOfflineParaformerModelConfig paraformer; + SherpaOnnxOfflineNemoEncDecCtcModelConfig nemo_ctc; + + const char *tokens; + int32_t num_threads; + int32_t debug; +} SherpaOnnxOfflineModelConfig; + +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig { + SherpaOnnxFeatureConfig feat_config; + SherpaOnnxOfflineModelConfig model_config; + SherpaOnnxOfflineLMConfig lm_config; + + const char *decoding_method; + int32_t max_active_paths; +} SherpaOnnxOfflineRecognizerConfig; + +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizer + SherpaOnnxOfflineRecognizer; + +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineStream SherpaOnnxOfflineStream; + +/// @param config Config for the recognizer. +/// @return Return a pointer to the recognizer. The user has to invoke +// DestroyOfflineRecognizer() to free it to avoid memory leak. +SHERPA_ONNX_API SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer( + const SherpaOnnxOfflineRecognizerConfig *config); + +/// Free a pointer returned by CreateOfflineRecognizer() +/// +/// @param p A pointer returned by CreateOfflineRecognizer() +SHERPA_ONNX_API void DestroyOfflineRecognizer( + SherpaOnnxOfflineRecognizer *recognizer); + +/// Create an offline stream for accepting wave samples. +/// +/// @param recognizer A pointer returned by CreateOfflineRecognizer() +/// @return Return a pointer to an OfflineStream. The user has to invoke +/// DestoryOfflineStream() to free it to avoid memory leak. +SHERPA_ONNX_API SherpaOnnxOfflineStream *CreateOfflineStream( + const SherpaOnnxOfflineRecognizer *recognizer); + +/// Destroy an offline stream. +/// +/// @param stream A pointer returned by CreateOfflineStream() +SHERPA_ONNX_API void DestoryOfflineStream(SherpaOnnxOfflineStream *stream); + +/// Accept input audio samples and compute the features. +/// The user has to invoke DecodeOfflineStream() to run the neural network and +/// decoding. +/// +/// @param stream A pointer returned by CreateOfflineStream(). +/// @param sample_rate Sample rate of the input samples. If it is different +/// from config.feat_config.sample_rate, we will do +/// resampling inside sherpa-onnx. +/// @param samples A pointer to a 1-D array containing audio samples. +/// The range of samples has to be normalized to [-1, 1]. +/// @param n Number of elements in the samples array. +/// +/// @caution: For each offline stream, please invoke this function only once! +SHERPA_ONNX_API void AcceptWaveformOffline(SherpaOnnxOfflineStream *stream, + int32_t sample_rate, + const float *samples, int32_t n); +/// Decode an offline stream. +/// +/// We assume you have invoked AcceptWaveformOffline() for the given stream +/// before calling this function. +/// +/// @param recognizer A pointer returned by CreateOfflineRecognizer(). +/// @param stream A pointer returned by CreateOfflineStream() +SHERPA_ONNX_API void DecodeOfflineStream( + SherpaOnnxOfflineRecognizer *recognizer, SherpaOnnxOfflineStream *stream); + +/// Decode a list offline streams in parallel. +/// +/// We assume you have invoked AcceptWaveformOffline() for each stream +/// before calling this function. +/// +/// @param recognizer A pointer returned by CreateOfflineRecognizer(). +/// @param streams A pointer pointer array containing pointers returned +/// by CreateOfflineStream(). +/// @param n Number of entries in the given streams. +SHERPA_ONNX_API void DecodeMultipleOfflineStreams( + SherpaOnnxOfflineRecognizer *recognizer, SherpaOnnxOfflineStream **streams, + int32_t n); + +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerResult { + const char *text; + // TODO(fangjun): Add more fields +} SherpaOnnxOfflineRecognizerResult; + +/// Get the result of the offline stream. +/// +/// We assume you have called DecodeOfflineStream() or +/// DecodeMultipleOfflineStreams() with the given stream before calling +/// this function. +/// +/// @param stream A pointer returned by CreateOfflineStream(). +/// @return Return a pointer to the result. The user has to invoke +/// DestroyOnlineRecognizerResult() to free the returned pointer to +/// avoid memory leak. +SHERPA_ONNX_API SherpaOnnxOfflineRecognizerResult *GetOfflineStreamResult( + SherpaOnnxOfflineStream *stream); + +/// Destroy the pointer returned by GetOfflineStreamResult(). +/// +/// @param r A pointer returned by GetOfflineStreamResult() +SHERPA_ONNX_API void DestroyOfflineRecognizerResult( + const SherpaOnnxOfflineRecognizerResult *r); #ifdef __cplusplus } /* extern "C" */ diff --git a/sherpa-onnx/csharp-api/SherpaOnnx.cs b/sherpa-onnx/csharp-api/SherpaOnnx.cs deleted file mode 100644 index 4cb25839..00000000 --- a/sherpa-onnx/csharp-api/SherpaOnnx.cs +++ /dev/null @@ -1,872 +0,0 @@ -using System.Runtime.InteropServices; -using System.Diagnostics; - -namespace SherpaOnnx -{ - /// - /// online recognizer package - /// Copyright (c) 2023 by manyeyes - /// - public class OnlineBase : IDisposable - { - public void Dispose() - { - Dispose(disposing: true); - GC.SuppressFinalize(this); - } - protected virtual void Dispose(bool disposing) - { - if (!disposing) - { - if (_onlineRecognizerResult != IntPtr.Zero) - { - SherpaOnnxSharp.DestroyOnlineRecognizerResult(_onlineRecognizerResult); - _onlineRecognizerResult = IntPtr.Zero; - } - if (_onlineStream.impl != IntPtr.Zero) - { - SherpaOnnxSharp.DestroyOnlineStream(_onlineStream); - _onlineStream.impl = IntPtr.Zero; - } - if (_onlineRecognizer.impl != IntPtr.Zero) - { - SherpaOnnxSharp.DestroyOnlineRecognizer(_onlineRecognizer); - _onlineRecognizer.impl = IntPtr.Zero; - } - this._disposed = true; - } - } - ~OnlineBase() - { - Dispose(this._disposed); - } - internal SherpaOnnxOnlineStream _onlineStream; - internal IntPtr _onlineRecognizerResult; - internal SherpaOnnxOnlineRecognizer _onlineRecognizer; - internal bool _disposed = false; - } - public class OnlineStream : OnlineBase - { - internal OnlineStream(SherpaOnnxOnlineStream onlineStream) - { - this._onlineStream = onlineStream; - } - protected override void Dispose(bool disposing) - { - if (!disposing) - { - SherpaOnnxSharp.DestroyOnlineStream(_onlineStream); - _onlineStream.impl = IntPtr.Zero; - this._disposed = true; - base.Dispose(); - } - } - } - public class OnlineRecognizerResult : OnlineBase - { - internal OnlineRecognizerResult(IntPtr onlineRecognizerResult) - { - this._onlineRecognizerResult = onlineRecognizerResult; - } - protected override void Dispose(bool disposing) - { - if (!disposing) - { - SherpaOnnxSharp.DestroyOnlineRecognizerResult(_onlineRecognizerResult); - _onlineRecognizerResult = IntPtr.Zero; - this._disposed = true; - base.Dispose(disposing); - } - } - } - public class OnlineRecognizer : OnlineBase - where T : class, new() - { - - public OnlineRecognizer(T t, - string tokensFilePath, string decoding_method = "greedy_search", - int sample_rate = 16000, int feature_dim = 80, - int num_threads = 2, bool debug = false, int max_active_paths = 4, - int enable_endpoint=0,int rule1_min_trailing_silence=0, - int rule2_min_trailing_silence=0,int rule3_min_utterance_length=0) - { - SherpaOnnxOnlineTransducer transducer = new SherpaOnnxOnlineTransducer(); - SherpaOnnxOnlineModelConfig model_config = new SherpaOnnxOnlineModelConfig(); - if (t is not null && t.GetType() == typeof(OnlineTransducer)) - { - OnlineTransducer? onlineTransducer = t as OnlineTransducer; -#pragma warning disable CS8602 // 解引用可能出现空引用。 - Trace.Assert(File.Exists(onlineTransducer.DecoderFilename) - && File.Exists(onlineTransducer.EncoderFilename) - && File.Exists(onlineTransducer.JoinerFilename), "Please provide a model"); -#pragma warning restore CS8602 // 解引用可能出现空引用。 - Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens"); - Trace.Assert(num_threads > 0, "num_threads must be greater than 0"); - transducer.encoder_filename = onlineTransducer.EncoderFilename; - transducer.decoder_filename = onlineTransducer.DecoderFilename; - transducer.joiner_filename = onlineTransducer.JoinerFilename; - } - - model_config.transducer = transducer; - model_config.num_threads = num_threads; - model_config.debug = debug; - model_config.tokens = tokensFilePath; - - SherpaOnnxFeatureConfig feat_config = new SherpaOnnxFeatureConfig(); - feat_config.sample_rate = sample_rate; - feat_config.feature_dim = feature_dim; - - SherpaOnnxOnlineRecognizerConfig sherpaOnnxOnlineRecognizerConfig; - sherpaOnnxOnlineRecognizerConfig.decoding_method = decoding_method; - sherpaOnnxOnlineRecognizerConfig.feat_config = feat_config; - sherpaOnnxOnlineRecognizerConfig.model_config = model_config; - sherpaOnnxOnlineRecognizerConfig.max_active_paths = max_active_paths; - //endpoint - sherpaOnnxOnlineRecognizerConfig.enable_endpoint = enable_endpoint; - sherpaOnnxOnlineRecognizerConfig.rule1_min_trailing_silence = rule1_min_trailing_silence; - sherpaOnnxOnlineRecognizerConfig.rule2_min_trailing_silence = rule2_min_trailing_silence; - sherpaOnnxOnlineRecognizerConfig.rule3_min_utterance_length = rule3_min_utterance_length; - - _onlineRecognizer = - SherpaOnnxSharp.CreateOnlineRecognizer(sherpaOnnxOnlineRecognizerConfig); - } - internal OnlineStream CreateOnlineStream() - { - SherpaOnnxOnlineStream stream = SherpaOnnxSharp.CreateOnlineStream(_onlineRecognizer); - return new OnlineStream(stream); - } - public void InputFinished(OnlineStream stream) - { - SherpaOnnxSharp.InputFinished(stream._onlineStream); - } - public List CreateStreams(List samplesList) - { - int batch_size = samplesList.Count; - List streams = new List(); - for (int i = 0; i < batch_size; i++) - { - OnlineStream stream = CreateOnlineStream(); - AcceptWaveform(stream._onlineStream, 16000, samplesList[i]); - InputFinished(stream); - streams.Add(stream); - } - return streams; - } - public OnlineStream CreateStream() - { - OnlineStream stream = CreateOnlineStream(); - return stream; - } - internal void AcceptWaveform(SherpaOnnxOnlineStream stream, int sample_rate, float[] samples) - { - SherpaOnnxSharp.AcceptOnlineWaveform(stream, sample_rate, samples, samples.Length); - } - public void AcceptWaveForm(OnlineStream stream, int sample_rate, float[] samples) - { - AcceptWaveform(stream._onlineStream, sample_rate, samples); - } - internal IntPtr GetStreamsIntPtr(OnlineStream[] streams) - { - int streams_len = streams.Length; - int size = Marshal.SizeOf(typeof(SherpaOnnxOnlineStream)); - IntPtr streamsIntPtr = Marshal.AllocHGlobal(size * streams_len); - unsafe - { - byte* ptrbds = (byte*)(streamsIntPtr.ToPointer()); - for (int i = 0; i < streams_len; i++, ptrbds += (size)) - { - IntPtr streamIntptr = new IntPtr(ptrbds); - Marshal.StructureToPtr(streams[i]._onlineStream, streamIntptr, false); - } - - } - return streamsIntPtr; - } - internal bool IsReady(OnlineStream stream) - { - return SherpaOnnxSharp.IsOnlineStreamReady(_onlineRecognizer, stream._onlineStream) != 0; - } - public void DecodeMultipleStreams(List streams) - { - while (true) - { - List streamList = new List(); - foreach (OnlineStream stream in streams) - { - if (IsReady(stream)) - { - streamList.Add(stream); - } - } - if (streamList.Count == 0) - { - break; - } - OnlineStream[] streamsBatch = new OnlineStream[streamList.Count]; - for (int i = 0; i < streamsBatch.Length; i++) - { - streamsBatch[i] = streamList[i]; - } - streamList.Clear(); - IntPtr streamsIntPtr = GetStreamsIntPtr(streamsBatch); - SherpaOnnxSharp.DecodeMultipleOnlineStreams(_onlineRecognizer, streamsIntPtr, streamsBatch.Length); - Marshal.FreeHGlobal(streamsIntPtr); - } - } - public void DecodeStream(OnlineStream stream) - { - while (IsReady(stream)) - { - SherpaOnnxSharp.DecodeOnlineStream(_onlineRecognizer, stream._onlineStream); - } - } - internal OnlineRecognizerResultEntity GetResult(SherpaOnnxOnlineStream stream) - { - IntPtr result_ip = SherpaOnnxSharp.GetOnlineStreamResult(_onlineRecognizer, stream); - OnlineRecognizerResult onlineRecognizerResult = new OnlineRecognizerResult(result_ip); -#pragma warning disable CS8605 // 取消装箱可能为 null 的值。 - SherpaOnnxOnlineRecognizerResult result = - (SherpaOnnxOnlineRecognizerResult)Marshal.PtrToStructure( - onlineRecognizerResult._onlineRecognizerResult, typeof(SherpaOnnxOnlineRecognizerResult)); -#pragma warning restore CS8605 // 取消装箱可能为 null 的值。 - -#pragma warning disable CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。 - string text = Marshal.PtrToStringAnsi(result.text); -#pragma warning restore CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。 - OnlineRecognizerResultEntity onlineRecognizerResultEntity = - new OnlineRecognizerResultEntity(); - onlineRecognizerResultEntity.text = text; - onlineRecognizerResultEntity.text_len = result.text_len; - - return onlineRecognizerResultEntity; - } - public OnlineRecognizerResultEntity GetResult(OnlineStream stream) - { - OnlineRecognizerResultEntity result = GetResult(stream._onlineStream); - return result; - } - public List GetResults(List streams) - { - List results = new List(); - foreach (OnlineStream stream in streams) - { - OnlineRecognizerResultEntity onlineRecognizerResultEntity = GetResult(stream._onlineStream); - results.Add(onlineRecognizerResultEntity); - } - return results; - } - protected override void Dispose(bool disposing) - { - if (!disposing) - { - SherpaOnnxSharp.DestroyOnlineRecognizer(_onlineRecognizer); - _onlineRecognizer.impl = IntPtr.Zero; - this._disposed = true; - base.Dispose(); - } - } - } - public class OfflineBase : IDisposable - { - public void Dispose() - { - Dispose(disposing: true); - GC.SuppressFinalize(this); - } - protected virtual void Dispose(bool disposing) - { - if (!disposing) - { - if (_offlineRecognizerResult != IntPtr.Zero) - { - SherpaOnnxSharp.DestroyOfflineRecognizerResult(_offlineRecognizerResult); - _offlineRecognizerResult = IntPtr.Zero; - } - if (_offlineStream.impl != IntPtr.Zero) - { - SherpaOnnxSharp.DestroyOfflineStream(_offlineStream); - _offlineStream.impl = IntPtr.Zero; - } - if (_offlineRecognizer.impl != IntPtr.Zero) - { - SherpaOnnxSharp.DestroyOfflineRecognizer(_offlineRecognizer); - _offlineRecognizer.impl = IntPtr.Zero; - } - this._disposed = true; - } - } - ~OfflineBase() - { - Dispose(this._disposed); - } - internal SherpaOnnxOfflineStream _offlineStream; - internal IntPtr _offlineRecognizerResult; - internal SherpaOnnxOfflineRecognizer _offlineRecognizer; - internal bool _disposed = false; - } - public class OfflineStream : OfflineBase - { - internal OfflineStream(SherpaOnnxOfflineStream offlineStream) - { - this._offlineStream = offlineStream; - } - - protected override void Dispose(bool disposing) - { - if (!disposing) - { - SherpaOnnxSharp.DestroyOfflineStream(_offlineStream); - _offlineStream.impl = IntPtr.Zero; - this._disposed = true; - base.Dispose(); - } - } - } - public class OfflineRecognizerResult : OfflineBase - { - internal OfflineRecognizerResult(IntPtr offlineRecognizerResult) - { - this._offlineRecognizerResult = offlineRecognizerResult; - } - protected override void Dispose(bool disposing) - { - if (!disposing) - { - SherpaOnnxSharp.DestroyOfflineRecognizerResult(_offlineRecognizerResult); - _offlineRecognizerResult = IntPtr.Zero; - this._disposed = true; - base.Dispose(disposing); - } - } - } - public class OfflineRecognizer : OfflineBase - where T : class, new() - { - public OfflineRecognizer(T t, - string tokensFilePath, string decoding_method = "greedy_search", - int sample_rate = 16000, int feature_dim = 80, - int num_threads = 2, bool debug = false) - { - SherpaOnnxOfflineTransducer transducer = new SherpaOnnxOfflineTransducer(); - SherpaOnnxOfflineParaformer paraformer = new SherpaOnnxOfflineParaformer(); - SherpaOnnxOfflineNemoEncDecCtc nemo_ctc = new SherpaOnnxOfflineNemoEncDecCtc(); - SherpaOnnxOfflineModelConfig model_config = new SherpaOnnxOfflineModelConfig(); - if (t is not null && t.GetType() == typeof(OfflineTransducer)) - { - OfflineTransducer? offlineTransducer = t as OfflineTransducer; -#pragma warning disable CS8602 // 解引用可能出现空引用。 - Trace.Assert(File.Exists(offlineTransducer.DecoderFilename) - && File.Exists(offlineTransducer.EncoderFilename) - && File.Exists(offlineTransducer.JoinerFilename), "Please provide a model"); -#pragma warning restore CS8602 // 解引用可能出现空引用。 - Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens"); - Trace.Assert(num_threads > 0, "num_threads must be greater than 0"); - transducer.encoder_filename = offlineTransducer.EncoderFilename; - transducer.decoder_filename = offlineTransducer.DecoderFilename; - transducer.joiner_filename = offlineTransducer.JoinerFilename; - } - else if (t is not null && t.GetType() == typeof(OfflineParaformer)) - { - OfflineParaformer? offlineParaformer = t as OfflineParaformer; -#pragma warning disable CS8602 // 解引用可能出现空引用。 - Trace.Assert(File.Exists(offlineParaformer.Model), "Please provide a model"); -#pragma warning restore CS8602 // 解引用可能出现空引用。 - Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens"); - Trace.Assert(num_threads > 0, "num_threads must be greater than 0"); - paraformer.model = offlineParaformer.Model; - } - else if (t is not null && t.GetType() == typeof(OfflineNemoEncDecCtc)) - { - OfflineNemoEncDecCtc? offlineNemoEncDecCtc = t as OfflineNemoEncDecCtc; -#pragma warning disable CS8602 // 解引用可能出现空引用。 - Trace.Assert(File.Exists(offlineNemoEncDecCtc.Model), "Please provide a model"); -#pragma warning restore CS8602 // 解引用可能出现空引用。 - Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens"); - Trace.Assert(num_threads > 0, "num_threads must be greater than 0"); - nemo_ctc.model = offlineNemoEncDecCtc.Model; - } - - model_config.transducer = transducer; - model_config.paraformer = paraformer; - model_config.nemo_ctc = nemo_ctc; - model_config.num_threads = num_threads; - model_config.debug = debug; - model_config.tokens = tokensFilePath; - - SherpaOnnxFeatureConfig feat_config = new SherpaOnnxFeatureConfig(); - feat_config.sample_rate = sample_rate; - feat_config.feature_dim = feature_dim; - - SherpaOnnxOfflineRecognizerConfig sherpaOnnxOfflineRecognizerConfig; - sherpaOnnxOfflineRecognizerConfig.decoding_method = decoding_method; - sherpaOnnxOfflineRecognizerConfig.feat_config = feat_config; - sherpaOnnxOfflineRecognizerConfig.model_config = model_config; - - _offlineRecognizer = - SherpaOnnxSharp.CreateOfflineRecognizer(sherpaOnnxOfflineRecognizerConfig); - } - internal OfflineStream CreateOfflineStream() - { - SherpaOnnxOfflineStream stream = SherpaOnnxSharp.CreateOfflineStream(_offlineRecognizer); - return new OfflineStream(stream); - } - public OfflineStream[] CreateOfflineStream(List samplesList) - { - int batch_size = samplesList.Count; - OfflineStream[] streams = new OfflineStream[batch_size]; - List wavFiles = new List(); - for (int i = 0; i < batch_size; i++) - { - OfflineStream stream = CreateOfflineStream(); - AcceptWaveform(stream._offlineStream, 16000, samplesList[i]); - streams[i] = stream; - } - return streams; - } - internal void AcceptWaveform(SherpaOnnxOfflineStream stream, int sample_rate, float[] samples) - { - SherpaOnnxSharp.AcceptWaveform(stream, sample_rate, samples, samples.Length); - } - internal IntPtr GetStreamsIntPtr(OfflineStream[] streams) - { - int streams_len = streams.Length; - int size = Marshal.SizeOf(typeof(SherpaOnnxOfflineStream)); - IntPtr streamsIntPtr = Marshal.AllocHGlobal(size * streams_len); - unsafe - { - byte* ptrbds = (byte*)(streamsIntPtr.ToPointer()); - for (int i = 0; i < streams_len; i++, ptrbds += (size)) - { - IntPtr streamIntptr = new IntPtr(ptrbds); - Marshal.StructureToPtr(streams[i]._offlineStream, streamIntptr, false); - } - } - return streamsIntPtr; - } - public void DecodeMultipleOfflineStreams(OfflineStream[] streams) - { - IntPtr streamsIntPtr = GetStreamsIntPtr(streams); - SherpaOnnxSharp.DecodeMultipleOfflineStreams(_offlineRecognizer, streamsIntPtr, streams.Length); - Marshal.FreeHGlobal(streamsIntPtr); - } - internal OfflineRecognizerResultEntity GetResult(SherpaOnnxOfflineStream stream) - { - IntPtr result_ip = SherpaOnnxSharp.GetOfflineStreamResult(stream); - OfflineRecognizerResult offlineRecognizerResult = new OfflineRecognizerResult(result_ip); -#pragma warning disable CS8605 // 取消装箱可能为 null 的值。 - SherpaOnnxOfflineRecognizerResult result = - (SherpaOnnxOfflineRecognizerResult)Marshal.PtrToStructure( - offlineRecognizerResult._offlineRecognizerResult, typeof(SherpaOnnxOfflineRecognizerResult)); -#pragma warning restore CS8605 // 取消装箱可能为 null 的值。 - -#pragma warning disable CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。 - string text = Marshal.PtrToStringAnsi(result.text); -#pragma warning restore CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。 - OfflineRecognizerResultEntity offlineRecognizerResultEntity = - new OfflineRecognizerResultEntity(); - offlineRecognizerResultEntity.text = text; - offlineRecognizerResultEntity.text_len = result.text_len; - - return offlineRecognizerResultEntity; - } - public List GetResults(OfflineStream[] streams) - { - List results = new List(); - foreach (OfflineStream stream in streams) - { - OfflineRecognizerResultEntity offlineRecognizerResultEntity = GetResult(stream._offlineStream); - results.Add(offlineRecognizerResultEntity); - } - return results; - } - protected override void Dispose(bool disposing) - { - if (!disposing) - { - SherpaOnnxSharp.DestroyOfflineRecognizer(_offlineRecognizer); - _offlineRecognizer.impl = IntPtr.Zero; - this._disposed = true; - base.Dispose(); - } - } - } - internal static partial class SherpaOnnxSharp - { - private const string dllName = @"SherpaOnnxSharp"; - - [DllImport(dllName, EntryPoint = "CreateOfflineRecognizer", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] - internal static extern SherpaOnnxOfflineRecognizer CreateOfflineRecognizer(SherpaOnnxOfflineRecognizerConfig config); - - [DllImport(dllName, EntryPoint = "CreateOfflineStream", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] - internal static extern SherpaOnnxOfflineStream CreateOfflineStream(SherpaOnnxOfflineRecognizer offlineRecognizer); - - [DllImport(dllName, EntryPoint = "AcceptWaveform", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] - internal static extern void AcceptWaveform(SherpaOnnxOfflineStream stream, int sample_rate, float[] samples, int samples_size); - - [DllImport(dllName, EntryPoint = "DecodeOfflineStream", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] - internal static extern void DecodeOfflineStream(SherpaOnnxOfflineRecognizer recognizer, SherpaOnnxOfflineStream stream); - - [DllImport(dllName, EntryPoint = "DecodeMultipleOfflineStreams", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] - internal static extern void DecodeMultipleOfflineStreams(SherpaOnnxOfflineRecognizer recognizer, IntPtr - streams, int n); - - [DllImport(dllName, EntryPoint = "GetOfflineStreamResult", CallingConvention = CallingConvention.Cdecl)] - internal static extern IntPtr GetOfflineStreamResult(SherpaOnnxOfflineStream stream); - - [DllImport(dllName, EntryPoint = "DestroyOfflineRecognizerResult", CallingConvention = CallingConvention.Cdecl)] - internal static extern void DestroyOfflineRecognizerResult(IntPtr result); - - [DllImport(dllName, EntryPoint = "DestroyOfflineStream", CallingConvention = CallingConvention.Cdecl)] - internal static extern void DestroyOfflineStream(SherpaOnnxOfflineStream stream); - - [DllImport(dllName, EntryPoint = "DestroyOfflineRecognizer", CallingConvention = CallingConvention.Cdecl)] - internal static extern void DestroyOfflineRecognizer(SherpaOnnxOfflineRecognizer offlineRecognizer); - - [DllImport(dllName, EntryPoint = "CreateOnlineRecognizer", CallingConvention = CallingConvention.Cdecl)] - internal static extern SherpaOnnxOnlineRecognizer CreateOnlineRecognizer(SherpaOnnxOnlineRecognizerConfig config); - - /// Free a pointer returned by CreateOnlineRecognizer() - /// - /// @param p A pointer returned by CreateOnlineRecognizer() - [DllImport(dllName, EntryPoint = "DestroyOnlineRecognizer", CallingConvention = CallingConvention.Cdecl)] - internal static extern void DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer recognizer); - - /// Create an online stream for accepting wave samples. - /// - /// @param recognizer A pointer returned by CreateOnlineRecognizer() - /// @return Return a pointer to an OnlineStream. The user has to invoke - /// DestroyOnlineStream() to free it to avoid memory leak. - [DllImport(dllName, EntryPoint = "CreateOnlineStream", CallingConvention = CallingConvention.Cdecl)] - internal static extern SherpaOnnxOnlineStream CreateOnlineStream( - SherpaOnnxOnlineRecognizer recognizer); - - /// Destroy an online stream. - /// - /// @param stream A pointer returned by CreateOnlineStream() - [DllImport(dllName, EntryPoint = "DestroyOnlineStream", CallingConvention = CallingConvention.Cdecl)] - internal static extern void DestroyOnlineStream(SherpaOnnxOnlineStream stream); - - /// Accept input audio samples and compute the features. - /// The user has to invoke DecodeOnlineStream() to run the neural network and - /// decoding. - /// - /// @param stream A pointer returned by CreateOnlineStream(). - /// @param sample_rate Sample rate of the input samples. If it is different - /// from config.feat_config.sample_rate, we will do - /// resampling inside sherpa-onnx. - /// @param samples A pointer to a 1-D array containing audio samples. - /// The range of samples has to be normalized to [-1, 1]. - /// @param n Number of elements in the samples array. - [DllImport(dllName, EntryPoint = "AcceptOnlineWaveform", CallingConvention = CallingConvention.Cdecl)] - internal static extern void AcceptOnlineWaveform(SherpaOnnxOnlineStream stream, int sample_rate, - float[] samples, int n); - - /// Return 1 if there are enough number of feature frames for decoding. - /// Return 0 otherwise. - /// - /// @param recognizer A pointer returned by CreateOnlineRecognizer - /// @param stream A pointer returned by CreateOnlineStream - [DllImport(dllName, EntryPoint = "IsOnlineStreamReady", CallingConvention = CallingConvention.Cdecl)] - internal static extern int IsOnlineStreamReady(SherpaOnnxOnlineRecognizer recognizer, - SherpaOnnxOnlineStream stream); - - /// Call this function to run the neural network model and decoding. - // - /// Precondition for this function: IsOnlineStreamReady() MUST return 1. - /// - /// Usage example: - /// - /// while (IsOnlineStreamReady(recognizer, stream)) { - /// DecodeOnlineStream(recognizer, stream); - /// } - /// - [DllImport(dllName, EntryPoint = "DecodeOnlineStream", CallingConvention = CallingConvention.Cdecl)] - internal static extern void DecodeOnlineStream(SherpaOnnxOnlineRecognizer recognizer, - SherpaOnnxOnlineStream stream); - - /// This function is similar to DecodeOnlineStream(). It decodes multiple - /// OnlineStream in parallel. - /// - /// Caution: The caller has to ensure each OnlineStream is ready, i.e., - /// IsOnlineStreamReady() for that stream should return 1. - /// - /// @param recognizer A pointer returned by CreateOnlineRecognizer() - /// @param streams A pointer array containing pointers returned by - /// CreateOnlineRecognizer() - /// @param n Number of elements in the given streams array. - [DllImport(dllName, EntryPoint = "DecodeMultipleOnlineStreams", CallingConvention = CallingConvention.Cdecl)] - internal static extern void DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer recognizer, - IntPtr streams, int n); - - /// Get the decoding results so far for an OnlineStream. - /// - /// @param recognizer A pointer returned by CreateOnlineRecognizer(). - /// @param stream A pointer returned by CreateOnlineStream(). - /// @return A pointer containing the result. The user has to invoke - /// DestroyOnlineRecognizerResult() to free the returned pointer to - /// avoid memory leak. - [DllImport(dllName, EntryPoint = "GetOnlineStreamResult", CallingConvention = CallingConvention.Cdecl)] - internal static extern IntPtr GetOnlineStreamResult( - SherpaOnnxOnlineRecognizer recognizer, SherpaOnnxOnlineStream stream); - - /// Destroy the pointer returned by GetOnlineStreamResult(). - /// - /// @param r A pointer returned by GetOnlineStreamResult() - [DllImport(dllName, EntryPoint = "DestroyOnlineRecognizerResult", CallingConvention = CallingConvention.Cdecl)] - internal static extern void DestroyOnlineRecognizerResult(IntPtr result); - - /// Reset an OnlineStream , which clears the neural network model state - /// and the state for decoding. - /// - /// @param recognizer A pointer returned by CreateOnlineRecognizer(). - /// @param stream A pointer returned by CreateOnlineStream - [DllImport(dllName, EntryPoint = "Reset", CallingConvention = CallingConvention.Cdecl)] - internal static extern void Reset(SherpaOnnxOnlineRecognizer recognizer, - SherpaOnnxOnlineStream stream); - - /// Signal that no more audio samples would be available. - /// After this call, you cannot call AcceptWaveform() any more. - /// - /// @param stream A pointer returned by CreateOnlineStream() - [DllImport(dllName, EntryPoint = "InputFinished", CallingConvention = CallingConvention.Cdecl)] - internal static extern void InputFinished(SherpaOnnxOnlineStream stream); - - /// Return 1 if an endpoint has been detected. - /// - /// @param recognizer A pointer returned by CreateOnlineRecognizer() - /// @param stream A pointer returned by CreateOnlineStream() - /// @return Return 1 if an endpoint is detected. Return 0 otherwise. - [DllImport(dllName, EntryPoint = "IsEndpoint", CallingConvention = CallingConvention.Cdecl)] - internal static extern int IsEndpoint(SherpaOnnxOnlineRecognizer recognizer, - SherpaOnnxOnlineStream stream); - } - internal struct SherpaOnnxOfflineTransducer - { - public string encoder_filename; - public string decoder_filename; - public string joiner_filename; - public SherpaOnnxOfflineTransducer() - { - encoder_filename = ""; - decoder_filename = ""; - joiner_filename = ""; - } - }; - internal struct SherpaOnnxOfflineParaformer - { - public string model; - public SherpaOnnxOfflineParaformer() - { - model = ""; - } - }; - internal struct SherpaOnnxOfflineNemoEncDecCtc - { - public string model; - public SherpaOnnxOfflineNemoEncDecCtc() - { - model = ""; - } - }; - internal struct SherpaOnnxOfflineModelConfig - { - public SherpaOnnxOfflineTransducer transducer; - public SherpaOnnxOfflineParaformer paraformer; - public SherpaOnnxOfflineNemoEncDecCtc nemo_ctc; - public string tokens; - public int num_threads; - public bool debug; - }; - /// It expects 16 kHz 16-bit single channel wave format. - internal struct SherpaOnnxFeatureConfig - { - /// Sample rate of the input data. MUST match the one expected - /// by the model. For instance, it should be 16000 for models provided - /// by us. - public int sample_rate; - - /// Feature dimension of the model. - /// For instance, it should be 80 for models provided by us. - public int feature_dim; - }; - internal struct SherpaOnnxOfflineRecognizerConfig - { - public SherpaOnnxFeatureConfig feat_config; - public SherpaOnnxOfflineModelConfig model_config; - - /// Possible values are: greedy_search, modified_beam_search - public string decoding_method; - - }; - internal struct SherpaOnnxOfflineRecognizer - { - public IntPtr impl; - }; - [StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi, Pack = 1)] - internal struct SherpaOnnxOfflineStream - { - public IntPtr impl; - }; - internal struct SherpaOnnxOfflineRecognizerResult - { - public IntPtr text; - public int text_len; - } - internal struct SherpaOnnxOnlineTransducer - { - public string encoder_filename; - public string decoder_filename; - public string joiner_filename; - public SherpaOnnxOnlineTransducer() - { - encoder_filename = string.Empty; - decoder_filename = string.Empty; - joiner_filename = string.Empty; - } - }; - internal struct SherpaOnnxOnlineModelConfig - { - public SherpaOnnxOnlineTransducer transducer; - public string tokens; - public int num_threads; - public bool debug; // true to print debug information of the model - }; - internal struct SherpaOnnxOnlineRecognizerConfig - { - public SherpaOnnxFeatureConfig feat_config; - public SherpaOnnxOnlineModelConfig model_config; - - /// Possible values are: greedy_search, modified_beam_search - public string decoding_method; - - /// Used only when decoding_method is modified_beam_search - /// Example value: 4 - public int max_active_paths; - - /// 0 to disable endpoint detection. - /// A non-zero value to enable endpoint detection. - public int enable_endpoint; - - /// An endpoint is detected if trailing silence in seconds is larger than - /// this value even if nothing has been decoded. - /// Used only when enable_endpoint is not 0. - public float rule1_min_trailing_silence; - - /// An endpoint is detected if trailing silence in seconds is larger than - /// this value after something that is not blank has been decoded. - /// Used only when enable_endpoint is not 0. - public float rule2_min_trailing_silence; - - /// An endpoint is detected if the utterance in seconds is larger than - /// this value. - /// Used only when enable_endpoint is not 0. - public float rule3_min_utterance_length; - }; - internal struct SherpaOnnxOnlineRecognizerResult - { - public IntPtr text; - public int text_len; - // TODO: Add more fields - } - internal struct SherpaOnnxOnlineRecognizer - { - public IntPtr impl; - }; - [StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi, Pack = 1)] - internal struct SherpaOnnxOnlineStream - { - public IntPtr impl; - }; - public class OfflineNemoEncDecCtc - { - private string model = string.Empty; - public string Model { get => model; set => model = value; } - } - public class OfflineParaformer - { - private string model = string.Empty; - public string Model { get => model; set => model = value; } - } - public class OfflineRecognizerResultEntity - { - /// - /// recognizer result - /// - public string? text { get; set; } - /// - /// recognizer result length - /// - public int text_len { get; set; } - /// - /// decode tokens - /// - public List? tokens { get; set; } - /// - /// timestamps - /// - public List? timestamps { get; set; } - } - public class OfflineTransducer - { - private string encoderFilename = string.Empty; - private string decoderFilename = string.Empty; - private string joinerFilename = string.Empty; - public string EncoderFilename { get => encoderFilename; set => encoderFilename = value; } - public string DecoderFilename { get => decoderFilename; set => decoderFilename = value; } - public string JoinerFilename { get => joinerFilename; set => joinerFilename = value; } - } - public class OnlineEndpoint - { - /// 0 to disable endpoint detection. - /// A non-zero value to enable endpoint detection. - private int enableEndpoint; - - /// An endpoint is detected if trailing silence in seconds is larger than - /// this value even if nothing has been decoded. - /// Used only when enable_endpoint is not 0. - private float rule1MinTrailingSilence; - - /// An endpoint is detected if trailing silence in seconds is larger than - /// this value after something that is not blank has been decoded. - /// Used only when enable_endpoint is not 0. - private float rule2MinTrailingSilence; - - /// An endpoint is detected if the utterance in seconds is larger than - /// this value. - /// Used only when enable_endpoint is not 0. - private float rule3MinUtteranceLength; - - public int EnableEndpoint { get => enableEndpoint; set => enableEndpoint = value; } - public float Rule1MinTrailingSilence { get => rule1MinTrailingSilence; set => rule1MinTrailingSilence = value; } - public float Rule2MinTrailingSilence { get => rule2MinTrailingSilence; set => rule2MinTrailingSilence = value; } - public float Rule3MinUtteranceLength { get => rule3MinUtteranceLength; set => rule3MinUtteranceLength = value; } - } - public class OnlineRecognizerResultEntity - { - /// - /// recognizer result - /// - public string? text { get; set; } - /// - /// recognizer result length - /// - public int text_len { get; set; } - /// - /// decode tokens - /// - public List? tokens { get; set; } - /// - /// timestamps - /// - public List? timestamps { get; set; } - } - public class OnlineTransducer - { - private string encoderFilename = string.Empty; - private string decoderFilename = string.Empty; - private string joinerFilename = string.Empty; - public string EncoderFilename { get => encoderFilename; set => encoderFilename = value; } - public string DecoderFilename { get => decoderFilename; set => decoderFilename = value; } - public string JoinerFilename { get => joinerFilename; set => joinerFilename = value; } - } -} \ No newline at end of file diff --git a/sherpa-onnx/csharp-api/SherpaOnnx.csproj b/sherpa-onnx/csharp-api/SherpaOnnx.csproj deleted file mode 100644 index 3f2790a2..00000000 --- a/sherpa-onnx/csharp-api/SherpaOnnx.csproj +++ /dev/null @@ -1,10 +0,0 @@ - - - - net6.0 - enable - enable - true - - - diff --git a/sherpa-onnx/csharp-api/offline-api.cpp b/sherpa-onnx/csharp-api/offline-api.cpp deleted file mode 100644 index 6908a2a8..00000000 --- a/sherpa-onnx/csharp-api/offline-api.cpp +++ /dev/null @@ -1,136 +0,0 @@ -// sherpa-onnx/sharp-api/offline-api.cpp -// -// Copyright (c) 2023 Manyeyes Corporation - -#include "offline-api.h" - -#include "sherpa-onnx/csrc/display.h" -#include "sherpa-onnx/csrc/offline-recognizer.h" - -namespace sherpa_onnx -{ - struct SherpaOnnxOfflineRecognizer { - sherpa_onnx::OfflineRecognizer* impl; - }; - - struct SherpaOnnxOfflineStream { - std::unique_ptr impl; - explicit SherpaOnnxOfflineStream(std::unique_ptr p) - : impl(std::move(p)) {} - }; - - struct SherpaOnnxDisplay { - std::unique_ptr impl; - }; - - SherpaOnnxOfflineRecognizer* __stdcall CreateOfflineRecognizer( - const SherpaOnnxOfflineRecognizerConfig* config) { - sherpa_onnx::OfflineRecognizerConfig recognizer_config; - - recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate; - recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim; - - if (strlen(config->model_config.transducer.encoder_filename) > 0) { - recognizer_config.model_config.transducer.encoder_filename = - config->model_config.transducer.encoder_filename; - recognizer_config.model_config.transducer.decoder_filename = - config->model_config.transducer.decoder_filename; - recognizer_config.model_config.transducer.joiner_filename = - config->model_config.transducer.joiner_filename; - } - else if (strlen(config->model_config.paraformer.model) > 0) { - recognizer_config.model_config.paraformer.model = - config->model_config.paraformer.model; - } - else if (strlen(config->model_config.nemo_ctc.model) > 0) { - recognizer_config.model_config.nemo_ctc.model = - config->model_config.nemo_ctc.model; - } - - recognizer_config.model_config.tokens = - config->model_config.tokens; - recognizer_config.model_config.num_threads = - config->model_config.num_threads; - recognizer_config.model_config.debug = - config->model_config.debug; - - recognizer_config.decoding_method = config->decoding_method; - - SherpaOnnxOfflineRecognizer* recognizer = - new SherpaOnnxOfflineRecognizer; - recognizer->impl = - new sherpa_onnx::OfflineRecognizer(recognizer_config); - - return recognizer; - } - - SherpaOnnxOfflineStream* __stdcall CreateOfflineStream( - SherpaOnnxOfflineRecognizer* recognizer) { - SherpaOnnxOfflineStream* stream = - new SherpaOnnxOfflineStream(recognizer->impl->CreateStream()); - return stream; - } - - void __stdcall AcceptWaveform( - SherpaOnnxOfflineStream* stream, - int32_t sample_rate, - const float* samples, int32_t samples_size) { - std::vector waveform{ samples, samples + samples_size }; - stream->impl->AcceptWaveform(sample_rate, waveform.data(), waveform.size()); - } - - void __stdcall DecodeOfflineStream( - SherpaOnnxOfflineRecognizer* recognizer, - SherpaOnnxOfflineStream* stream) { - recognizer->impl->DecodeStream(stream->impl.get()); - } - - void __stdcall DecodeMultipleOfflineStreams( - SherpaOnnxOfflineRecognizer* recognizer, - SherpaOnnxOfflineStream** streams, int32_t n) { - std::vector ss(n); - for (int32_t i = 0; i != n; ++i) { - ss[i] = streams[i]->impl.get(); - } - recognizer->impl->DecodeStreams(ss.data(), n); - } - - SherpaOnnxOfflineRecognizerResult* __stdcall GetOfflineStreamResult( - SherpaOnnxOfflineStream* stream) { - sherpa_onnx::OfflineRecognitionResult result = - stream->impl->GetResult(); - const auto& text = result.text; - auto r = new SherpaOnnxOfflineRecognizerResult; - r->text = new char[text.size() + 1]; - std::copy(text.begin(), text.end(), const_cast(r->text)); - const_cast(r->text)[text.size()] = 0; - r->text_len = text.size(); - return r; - } - - - /// Free a pointer returned by CreateOfflineRecognizer() - /// - /// @param p A pointer returned by CreateOfflineRecognizer() - void __stdcall DestroyOfflineRecognizer( - SherpaOnnxOfflineRecognizer* recognizer) { - delete recognizer->impl; - delete recognizer; - } - - /// Destory an offline stream. - /// - /// @param stream A pointer returned by CreateOfflineStream() - void __stdcall DestroyOfflineStream(SherpaOnnxOfflineStream* stream) { - delete stream; - } - - /// Destroy the pointer returned by GetOfflineStreamResult(). - /// - /// @param r A pointer returned by GetOfflineStreamResult() - void __stdcall DestroyOfflineRecognizerResult( - SherpaOnnxOfflineRecognizerResult* r) { - delete r->text; - delete r; - } -}// namespace sherpa_onnx \ No newline at end of file diff --git a/sherpa-onnx/csharp-api/offline-api.h b/sherpa-onnx/csharp-api/offline-api.h deleted file mode 100644 index 03c6011d..00000000 --- a/sherpa-onnx/csharp-api/offline-api.h +++ /dev/null @@ -1,122 +0,0 @@ -// sherpa-onnx/sharp-api/offline-api.h -// -// Copyright (c) 2023 Manyeyes Corporation - -#pragma once - -#include - -namespace sherpa_onnx -{ - /// Please refer to - /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html - /// to download pre-trained models. That is, you can find encoder-xxx.onnx - /// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct - /// from there. - typedef struct SherpaOnnxOfflineTransducer { - const char* encoder_filename; - const char* decoder_filename; - const char* joiner_filename; - } SherpaOnnxOfflineTransducer; - - typedef struct SherpaOnnxOfflineParaformer { - const char* model; - }SherpaOnnxOfflineParaformer; - - typedef struct SherpaOnnxOfflineNemoEncDecCtc { - const char* model; - }SherpaOnnxOfflineNemoEncDecCtc; - - - typedef struct SherpaOnnxOfflineModelConfig { - SherpaOnnxOfflineTransducer transducer; - SherpaOnnxOfflineParaformer paraformer; - SherpaOnnxOfflineNemoEncDecCtc nemo_ctc; - const char* tokens; - const int32_t num_threads; - const bool debug; - } SherpaOnnxOfflineModelConfig; - - /// It expects 16 kHz 16-bit single channel wave format. - typedef struct SherpaOnnxFeatureConfig { - /// Sample rate of the input data. MUST match the one expected - /// by the model. For instance, it should be 16000 for models provided - /// by us. - int32_t sample_rate; - - /// Feature dimension of the model. - /// For instance, it should be 80 for models provided by us. - int32_t feature_dim; - } SherpaOnnxFeatureConfig; - - typedef struct SherpaOnnxOfflineRecognizerConfig { - SherpaOnnxFeatureConfig feat_config; - SherpaOnnxOfflineModelConfig model_config; - - /// Possible values are: greedy_search, modified_beam_search - const char* decoding_method; - - } SherpaOnnxOfflineRecognizerConfig; - - typedef struct SherpaOnnxOfflineRecognizerResult { - // Recognition results. - // For English, it consists of space separated words. - // For Chinese, it consists of Chinese words without spaces. - char* text; - int text_len; - - // Decoded results at the token level. - // For instance, for BPE-based models it consists of a list of BPE tokens. - // std::vector tokens; - - // timestamps.size() == tokens.size() - // timestamps[i] records the time in seconds when tokens[i] is decoded. - // std::vector timestamps; - } SherpaOnnxOfflineRecognizerResult; - - /// Note: OfflineRecognizer here means StreamingRecognizer. - /// It does not need to access the Internet during recognition. - /// Everything is run locally. - typedef struct SherpaOnnxOfflineRecognizer SherpaOnnxOfflineRecognizer; - - typedef struct SherpaOnnxOfflineStream SherpaOnnxOfflineStream; - - extern "C" __declspec(dllexport) - SherpaOnnxOfflineRecognizer * __stdcall CreateOfflineRecognizer( - const SherpaOnnxOfflineRecognizerConfig * config); - - extern "C" __declspec(dllexport) - SherpaOnnxOfflineStream * __stdcall CreateOfflineStream( - SherpaOnnxOfflineRecognizer * sherpaOnnxOfflineRecognizer); - - extern "C" __declspec(dllexport) - void __stdcall AcceptWaveform( - SherpaOnnxOfflineStream * stream, int32_t sample_rate, - const float* samples, int32_t samples_size); - - extern "C" __declspec(dllexport) - void __stdcall DecodeOfflineStream( - SherpaOnnxOfflineRecognizer * recognizer, - SherpaOnnxOfflineStream * stream); - - extern "C" __declspec(dllexport) - void __stdcall DecodeMultipleOfflineStreams( - SherpaOnnxOfflineRecognizer * recognizer, - SherpaOnnxOfflineStream * *streams, int32_t n); - - extern "C" __declspec(dllexport) - SherpaOnnxOfflineRecognizerResult * __stdcall GetOfflineStreamResult( - SherpaOnnxOfflineStream * stream); - - extern "C" __declspec(dllexport) - void __stdcall DestroyOfflineRecognizer( - SherpaOnnxOfflineRecognizer * recognizer); - - extern "C" __declspec(dllexport) - void __stdcall DestroyOfflineStream( - SherpaOnnxOfflineStream * stream); - - extern "C" __declspec(dllexport) - void __stdcall DestroyOfflineRecognizerResult( - SherpaOnnxOfflineRecognizerResult * r); -}// namespace sherpa_onnx \ No newline at end of file diff --git a/sherpa-onnx/csharp-api/online-api.cc b/sherpa-onnx/csharp-api/online-api.cc deleted file mode 100644 index b65369a0..00000000 --- a/sherpa-onnx/csharp-api/online-api.cc +++ /dev/null @@ -1,148 +0,0 @@ -// sherpa-onnx/cpp-api/c-api.cc -// -// Copyright (c) 2023 Xiaomi Corporation - -#include "online-api.h" - -#include -#include -#include -#include - -#include "../../sherpa-onnx/csrc/display.h" -#include "../../sherpa-onnx/csrc/online-recognizer.h" -namespace sherpa_onnx -{ - struct SherpaOnnxOnlineRecognizer { - sherpa_onnx::OnlineRecognizer* impl; - }; - - struct SherpaOnnxOnlineStream { - std::unique_ptr impl; - explicit SherpaOnnxOnlineStream(std::unique_ptr p) - : impl(std::move(p)) {} - }; - - struct SherpaOnnxDisplay { - std::unique_ptr impl; - }; - - SherpaOnnxOnlineRecognizer* __stdcall CreateOnlineRecognizer( - const SherpaOnnxOnlineRecognizerConfig* config) { - sherpa_onnx::OnlineRecognizerConfig recognizer_config; - - recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate; - recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim; - - recognizer_config.model_config.encoder_filename = - config->model_config.transducer.encoder; - recognizer_config.model_config.decoder_filename = - config->model_config.transducer.decoder; - recognizer_config.model_config.joiner_filename = config->model_config.transducer.joiner; - recognizer_config.model_config.tokens = config->model_config.tokens; - recognizer_config.model_config.num_threads = config->model_config.num_threads; - recognizer_config.model_config.debug = config->model_config.debug; - - recognizer_config.decoding_method = config->decoding_method; - recognizer_config.max_active_paths = config->max_active_paths; - - recognizer_config.enable_endpoint = config->enable_endpoint; - - recognizer_config.endpoint_config.rule1.min_trailing_silence = - config->rule1_min_trailing_silence; - - recognizer_config.endpoint_config.rule2.min_trailing_silence = - config->rule2_min_trailing_silence; - - recognizer_config.endpoint_config.rule3.min_utterance_length = - config->rule3_min_utterance_length; - - SherpaOnnxOnlineRecognizer* recognizer = new SherpaOnnxOnlineRecognizer; - recognizer->impl = new sherpa_onnx::OnlineRecognizer(recognizer_config); - - return recognizer; - } - - void __stdcall DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer* recognizer) { - delete recognizer->impl; - delete recognizer; - } - - SherpaOnnxOnlineStream* __stdcall CreateOnlineStream( - const SherpaOnnxOnlineRecognizer* recognizer) { - SherpaOnnxOnlineStream* stream = - new SherpaOnnxOnlineStream(recognizer->impl->CreateStream()); - return stream; - } - - void __stdcall DestroyOnlineStream(SherpaOnnxOnlineStream* stream) { delete stream; } - - void __stdcall AcceptOnlineWaveform(SherpaOnnxOnlineStream* stream, int32_t sample_rate, - const float* samples, int32_t n) { - stream->impl->AcceptWaveform(sample_rate, samples, n); - } - - int32_t __stdcall IsOnlineStreamReady(SherpaOnnxOnlineRecognizer* recognizer, - SherpaOnnxOnlineStream* stream) { - return recognizer->impl->IsReady(stream->impl.get()); - } - - void __stdcall DecodeOnlineStream(SherpaOnnxOnlineRecognizer* recognizer, - SherpaOnnxOnlineStream* stream) { - recognizer->impl->DecodeStream(stream->impl.get()); - } - - void __stdcall DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer* recognizer, - SherpaOnnxOnlineStream** streams, int32_t n) { - std::vector ss(n); - for (int32_t i = 0; i != n; ++i) { - ss[i] = streams[i]->impl.get(); - } - recognizer->impl->DecodeStreams(ss.data(), n); - } - - SherpaOnnxOnlineRecognizerResult* __stdcall GetOnlineStreamResult( - SherpaOnnxOnlineRecognizer* recognizer, SherpaOnnxOnlineStream* stream) { - sherpa_onnx::OnlineRecognizerResult result = - recognizer->impl->GetResult(stream->impl.get()); - const auto& text = result.text; - - auto r = new SherpaOnnxOnlineRecognizerResult; - r->text = new char[text.size() + 1]; - std::copy(text.begin(), text.end(), const_cast(r->text)); - const_cast(r->text)[text.size()] = 0; - r->text_len = text.size(); - return r; - } - - void __stdcall DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult* r) { - delete[] r->text; - delete r; - } - - void __stdcall Reset(SherpaOnnxOnlineRecognizer* recognizer, - SherpaOnnxOnlineStream* stream) { - recognizer->impl->Reset(stream->impl.get()); - } - - void __stdcall InputFinished(SherpaOnnxOnlineStream* stream) { - stream->impl->InputFinished(); - } - - int32_t __stdcall IsEndpoint(SherpaOnnxOnlineRecognizer* recognizer, - SherpaOnnxOnlineStream* stream) { - return recognizer->impl->IsEndpoint(stream->impl.get()); - } - - SherpaOnnxDisplay* __stdcall CreateDisplay(int32_t max_word_per_line) { - SherpaOnnxDisplay* ans = new SherpaOnnxDisplay; - ans->impl = std::make_unique(max_word_per_line); - return ans; - } - - void __stdcall DestroyDisplay(SherpaOnnxDisplay* display) { delete display; } - - void __stdcall SherpaOnnxPrint(SherpaOnnxDisplay* display, int32_t idx, const char* s) { - display->impl->Print(idx, s); - } -} \ No newline at end of file diff --git a/sherpa-onnx/csharp-api/online-api.h b/sherpa-onnx/csharp-api/online-api.h deleted file mode 100644 index bd5150d8..00000000 --- a/sherpa-onnx/csharp-api/online-api.h +++ /dev/null @@ -1,238 +0,0 @@ -// sherpa-onnx/cpp-api/c-api.h -// -// Copyright (c) 2023 Xiaomi Corporation - -// C API for sherpa-onnx -// -// Please refer to -// https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c -// for usages. -// - -#ifndef SHERPA_ONNX_CPP_API_C_API_H_ -#define SHERPA_ONNX_CPP_API_C_API_H_ - -#include - -#ifdef __cplusplus -extern "C" { -#endif - namespace sherpa_onnx - { - /// Please refer to - /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html - /// to download pre-trained models. That is, you can find encoder-xxx.onnx - /// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct - /// from there. - typedef struct SherpaOnnxOnlineTransducer { - const char* encoder; - const char* decoder; - const char* joiner; - } SherpaOnnxOnlineTransducer; - - typedef struct SherpaOnnxOnlineModelConfig - { - const SherpaOnnxOnlineTransducer transducer; - const char* tokens; - const int32_t num_threads; - const bool debug; // true to print debug information of the model - }SherpaOnnxOnlineModelConfig; - - /// It expects 16 kHz 16-bit single channel wave format. - typedef struct SherpaOnnxFeatureConfig { - /// Sample rate of the input data. MUST match the one expected - /// by the model. For instance, it should be 16000 for models provided - /// by us. - int32_t sample_rate; - - /// Feature dimension of the model. - /// For instance, it should be 80 for models provided by us. - int32_t feature_dim; - } SherpaOnnxFeatureConfig; - - typedef struct SherpaOnnxOnlineRecognizerConfig { - SherpaOnnxFeatureConfig feat_config; - SherpaOnnxOnlineModelConfig model_config; - - /// Possible values are: greedy_search, modified_beam_search - const char* decoding_method; - - /// Used only when decoding_method is modified_beam_search - /// Example value: 4 - int32_t max_active_paths; - - /// 0 to disable endpoint detection. - /// A non-zero value to enable endpoint detection. - int enable_endpoint; - - /// An endpoint is detected if trailing silence in seconds is larger than - /// this value even if nothing has been decoded. - /// Used only when enable_endpoint is not 0. - float rule1_min_trailing_silence; - - /// An endpoint is detected if trailing silence in seconds is larger than - /// this value after something that is not blank has been decoded. - /// Used only when enable_endpoint is not 0. - float rule2_min_trailing_silence; - - /// An endpoint is detected if the utterance in seconds is larger than - /// this value. - /// Used only when enable_endpoint is not 0. - float rule3_min_utterance_length; - } SherpaOnnxOnlineRecognizerConfig; - - typedef struct SherpaOnnxOnlineRecognizerResult { - const char* text; - int text_len; - // TODO(fangjun): Add more fields - } SherpaOnnxOnlineRecognizerResult; - - /// Note: OnlineRecognizer here means StreamingRecognizer. - /// It does not need to access the Internet during recognition. - /// Everything is run locally. - typedef struct SherpaOnnxOnlineRecognizer SherpaOnnxOnlineRecognizer; - typedef struct SherpaOnnxOnlineStream SherpaOnnxOnlineStream; - - /// @param config Config for the recongizer. - /// @return Return a pointer to the recognizer. The user has to invoke - // DestroyOnlineRecognizer() to free it to avoid memory leak. - extern "C" __declspec(dllexport) - SherpaOnnxOnlineRecognizer* __stdcall CreateOnlineRecognizer( - const SherpaOnnxOnlineRecognizerConfig * config); - - /// Free a pointer returned by CreateOnlineRecognizer() - /// - /// @param p A pointer returned by CreateOnlineRecognizer() - extern "C" __declspec(dllexport) - void __stdcall DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer* recognizer); - - /// Create an online stream for accepting wave samples. - /// - /// @param recognizer A pointer returned by CreateOnlineRecognizer() - /// @return Return a pointer to an OnlineStream. The user has to invoke - /// DestroyOnlineStream() to free it to avoid memory leak. - extern "C" __declspec(dllexport) - SherpaOnnxOnlineStream* __stdcall CreateOnlineStream( - const SherpaOnnxOnlineRecognizer* recognizer); - - /// Destroy an online stream. - /// - /// @param stream A pointer returned by CreateOnlineStream() - extern "C" __declspec(dllexport) - void __stdcall DestroyOnlineStream(SherpaOnnxOnlineStream* stream); - - /// Accept input audio samples and compute the features. - /// The user has to invoke DecodeOnlineStream() to run the neural network and - /// decoding. - /// - /// @param stream A pointer returned by CreateOnlineStream(). - /// @param sample_rate Sample rate of the input samples. If it is different - /// from config.feat_config.sample_rate, we will do - /// resampling inside sherpa-onnx. - /// @param samples A pointer to a 1-D array containing audio samples. - /// The range of samples has to be normalized to [-1, 1]. - /// @param n Number of elements in the samples array. - extern "C" __declspec(dllexport) - void __stdcall AcceptOnlineWaveform(SherpaOnnxOnlineStream* stream, int32_t sample_rate, - const float* samples, int32_t n); - - /// Return 1 if there are enough number of feature frames for decoding. - /// Return 0 otherwise. - /// - /// @param recognizer A pointer returned by CreateOnlineRecognizer - /// @param stream A pointer returned by CreateOnlineStream - extern "C" __declspec(dllexport) - int32_t __stdcall IsOnlineStreamReady(SherpaOnnxOnlineRecognizer* recognizer, - SherpaOnnxOnlineStream* stream); - - /// Call this function to run the neural network model and decoding. - // - /// Precondition for this function: IsOnlineStreamReady() MUST return 1. - /// - /// Usage example: - /// - /// while (IsOnlineStreamReady(recognizer, stream)) { - /// DecodeOnlineStream(recognizer, stream); - /// } - /// - extern "C" __declspec(dllexport) - void __stdcall DecodeOnlineStream(SherpaOnnxOnlineRecognizer* recognizer, - SherpaOnnxOnlineStream* stream); - - /// This function is similar to DecodeOnlineStream(). It decodes multiple - /// OnlineStream in parallel. - /// - /// Caution: The caller has to ensure each OnlineStream is ready, i.e., - /// IsOnlineStreamReady() for that stream should return 1. - /// - /// @param recognizer A pointer returned by CreateOnlineRecognizer() - /// @param streams A pointer array containing pointers returned by - /// CreateOnlineRecognizer() - /// @param n Number of elements in the given streams array. - extern "C" __declspec(dllexport) - void __stdcall DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer* recognizer, - SherpaOnnxOnlineStream** streams, int32_t n); - - /// Get the decoding results so far for an OnlineStream. - /// - /// @param recognizer A pointer returned by CreateOnlineRecognizer(). - /// @param stream A pointer returned by CreateOnlineStream(). - /// @return A pointer containing the result. The user has to invoke - /// DestroyOnlineRecognizerResult() to free the returned pointer to - /// avoid memory leak. - extern "C" __declspec(dllexport) - SherpaOnnxOnlineRecognizerResult* __stdcall GetOnlineStreamResult( - SherpaOnnxOnlineRecognizer* recognizer, SherpaOnnxOnlineStream* stream); - - /// Destroy the pointer returned by GetOnlineStreamResult(). - /// - /// @param r A pointer returned by GetOnlineStreamResult() - extern "C" __declspec(dllexport) - void __stdcall DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult* r); - - /// Reset an OnlineStream , which clears the neural network model state - /// and the state for decoding. - /// - /// @param recognizer A pointer returned by CreateOnlineRecognizer(). - /// @param stream A pointer returned by CreateOnlineStream - extern "C" __declspec(dllexport) - void __stdcall Reset(SherpaOnnxOnlineRecognizer* recognizer, - SherpaOnnxOnlineStream* stream); - - /// Signal that no more audio samples would be available. - /// After this call, you cannot call AcceptWaveform() any more. - /// - /// @param stream A pointer returned by CreateOnlineStream() - extern "C" __declspec(dllexport) - void __stdcall InputFinished(SherpaOnnxOnlineStream* stream); - - /// Return 1 if an endpoint has been detected. - /// - /// @param recognizer A pointer returned by CreateOnlineRecognizer() - /// @param stream A pointer returned by CreateOnlineStream() - /// @return Return 1 if an endpoint is detected. Return 0 otherwise. - extern "C" __declspec(dllexport) - int32_t __stdcall IsEndpoint(SherpaOnnxOnlineRecognizer* recognizer, - SherpaOnnxOnlineStream* stream); - - // for displaying results on Linux/macOS. - typedef struct SherpaOnnxDisplay SherpaOnnxDisplay; - - /// Create a display object. Must be freed using DestroyDisplay to avoid - /// memory leak. - extern "C" __declspec(dllexport) - SherpaOnnxDisplay* __stdcall CreateDisplay(int32_t max_word_per_line); - - extern "C" __declspec(dllexport) - void __stdcall DestroyDisplay(SherpaOnnxDisplay* display); - - /// Print the result. - extern "C" __declspec(dllexport) - void __stdcall SherpaOnnxPrint(SherpaOnnxDisplay* display, int32_t idx, const char* s); - } - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif // SHERPA_ONNX_C_API_C_API_H_