Refactor C# code and support building nuget packages for cross-platforms (#144)

This commit is contained in:
Fangjun Kuang
2023-05-10 14:53:04 +08:00
committed by GitHub
parent 0bc571f6ee
commit 7969cf44ac
40 changed files with 2050 additions and 2311 deletions

5
scripts/dotnet/.gitignore vendored Normal file
View File

@@ -0,0 +1,5 @@
all
macos
linux
windows
packages

17
scripts/dotnet/README.md Normal file
View File

@@ -0,0 +1,17 @@
# Introduction
[sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx) is an open-source
real-time speech recognition toolkit developed
by the Next-gen Kaldi team.
It supports streaming recognition on a variety of
platforms such as Android, iOS, Raspberry, Linux, Windows, macOS, etc.
It does not require Internet connection during recognition.
See the documentation https://k2-fsa.github.io/sherpa/onnx/index.html
for details.
Please see
https://github.com/k2-fsa/sherpa-onnx/tree/dot-net/dotnet-examples
for how to use C# APIs of this package.

118
scripts/dotnet/generate.py Executable file
View File

@@ -0,0 +1,118 @@
#!/usr/bin/env python3
# Copyright (c) 2023 Xiaomi Corporation
import argparse
import re
from pathlib import Path
import jinja2
SHERPA_ONNX_DIR = Path(__file__).resolve().parent.parent.parent
def get_version():
cmake_file = SHERPA_ONNX_DIR / "CMakeLists.txt"
with open(cmake_file) as f:
content = f.read()
version = re.search(r"set\(SHERPA_ONNX_VERSION (.*)\)", content).group(1)
return version.strip('"')
def read_proj_file(filename):
with open(filename) as f:
return f.read()
def get_dict():
version = get_version()
return {
"version": get_version(),
}
def process_linux(s):
libs = [
"libkaldi-native-fbank-core.so",
"libonnxruntime.so.1.14.0",
"libsherpa-onnx-c-api.so",
"libsherpa-onnx-core.so",
]
prefix = f"{SHERPA_ONNX_DIR}/linux/sherpa_onnx/lib/"
libs = [prefix + lib for lib in libs]
libs = "\n ;".join(libs)
d = get_dict()
d["dotnet_rid"] = "linux-x64"
d["libs"] = libs
environment = jinja2.Environment()
template = environment.from_string(s)
s = template.render(**d)
with open("./linux/sherpa-onnx.runtime.csproj", "w") as f:
f.write(s)
def process_macos(s):
libs = [
"libkaldi-native-fbank-core.dylib",
"libonnxruntime.1.14.0.dylib",
"libsherpa-onnx-c-api.dylib",
"libsherpa-onnx-core.dylib",
]
prefix = f"{SHERPA_ONNX_DIR}/macos/sherpa_onnx/lib/"
libs = [prefix + lib for lib in libs]
libs = "\n ;".join(libs)
d = get_dict()
d["dotnet_rid"] = "osx-x64"
d["libs"] = libs
environment = jinja2.Environment()
template = environment.from_string(s)
s = template.render(**d)
with open("./macos/sherpa-onnx.runtime.csproj", "w") as f:
f.write(s)
def process_windows(s):
libs = [
"kaldi-native-fbank-core.dll",
"onnxruntime.dll",
"sherpa-onnx-c-api.dll",
"sherpa-onnx-core.dll",
]
prefix = f"{SHERPA_ONNX_DIR}/windows/sherpa_onnx/lib/"
libs = [prefix + lib for lib in libs]
libs = "\n ;".join(libs)
d = get_dict()
d["dotnet_rid"] = "win-x64"
d["libs"] = libs
environment = jinja2.Environment()
template = environment.from_string(s)
s = template.render(**d)
with open("./windows/sherpa-onnx.runtime.csproj", "w") as f:
f.write(s)
def main():
s = read_proj_file("./sherpa-onnx.csproj.runtime.in")
process_macos(s)
process_linux(s)
process_windows(s)
s = read_proj_file("./sherpa-onnx.csproj.in")
d = get_dict()
d["packages_dir"] = str(SHERPA_ONNX_DIR / "scripts/dotnet/packages")
environment = jinja2.Environment()
template = environment.from_string(s)
s = template.render(**d)
with open("./all/sherpa-onnx.csproj", "w") as f:
f.write(s)
if __name__ == "__main__":
main()

259
scripts/dotnet/offline.cs Normal file
View File

@@ -0,0 +1,259 @@
/// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
/// Copyright (c) 2023 by manyeyes
using System.Linq;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System;
namespace SherpaOnnx
{
[StructLayout(LayoutKind.Sequential)]
public struct OfflineTransducerModelConfig
{
public OfflineTransducerModelConfig()
{
Encoder = "";
Decoder = "";
Joiner = "";
}
[MarshalAs(UnmanagedType.LPStr)]
public string Encoder;
[MarshalAs(UnmanagedType.LPStr)]
public string Decoder;
[MarshalAs(UnmanagedType.LPStr)]
public string Joiner;
}
[StructLayout(LayoutKind.Sequential)]
public struct OfflineParaformerModelConfig
{
public OfflineParaformerModelConfig()
{
Model = "";
}
[MarshalAs(UnmanagedType.LPStr)]
public string Model;
}
[StructLayout(LayoutKind.Sequential)]
public struct OfflineNemoEncDecCtcModelConfig
{
public OfflineNemoEncDecCtcModelConfig()
{
Model = "";
}
[MarshalAs(UnmanagedType.LPStr)]
public string Model;
}
[StructLayout(LayoutKind.Sequential)]
public struct OfflineLMConfig
{
public OfflineLMConfig()
{
Model = "";
Scale = 0.5F;
}
[MarshalAs(UnmanagedType.LPStr)]
public string Model;
public float Scale;
}
[StructLayout(LayoutKind.Sequential)]
public struct OfflineModelConfig
{
public OfflineModelConfig()
{
Transducer = new OfflineTransducerModelConfig();
Paraformer = new OfflineParaformerModelConfig();
NeMoCtc = new OfflineNemoEncDecCtcModelConfig();
Tokens = "";
NumThreads = 1;
Debug = 0;
}
public OfflineTransducerModelConfig Transducer;
public OfflineParaformerModelConfig Paraformer;
public OfflineNemoEncDecCtcModelConfig NeMoCtc;
[MarshalAs(UnmanagedType.LPStr)]
public string Tokens;
public int NumThreads;
public int Debug;
}
[StructLayout(LayoutKind.Sequential)]
public struct OfflineRecognizerConfig
{
public OfflineRecognizerConfig()
{
FeatConfig = new FeatureConfig();
ModelConfig = new OfflineModelConfig();
LmConfig = new OfflineLMConfig();
DecodingMethod = "greedy_search";
MaxActivePaths = 4;
}
public FeatureConfig FeatConfig;
public OfflineModelConfig ModelConfig;
public OfflineLMConfig LmConfig;
[MarshalAs(UnmanagedType.LPStr)]
public string DecodingMethod;
public int MaxActivePaths;
}
public class OfflineRecognizerResult
{
public OfflineRecognizerResult(IntPtr handle)
{
Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl));
_text = Marshal.PtrToStringUTF8(impl.Text);
}
[StructLayout(LayoutKind.Sequential)]
struct Impl
{
public IntPtr Text;
}
private String _text;
public String Text => _text;
}
public class OfflineStream : IDisposable
{
public OfflineStream(IntPtr p)
{
_handle = new HandleRef(this, p);
}
public void AcceptWaveform(int sampleRate, float[] samples)
{
AcceptWaveform(Handle, sampleRate, samples, samples.Length);
}
public OfflineRecognizerResult Result
{
get
{
IntPtr h = GetResult(_handle.Handle);
OfflineRecognizerResult result = new OfflineRecognizerResult(h);
DestroyResult(h);
return result;
}
}
~OfflineStream()
{
Cleanup();
}
public void Dispose()
{
Cleanup();
// Prevent the object from being placed on the
// finalization queue
System.GC.SuppressFinalize(this);
}
private void Cleanup()
{
DestroyOfflineStream(Handle);
// Don't permit the handle to be used again.
_handle = new HandleRef(this, IntPtr.Zero);
}
private HandleRef _handle;
public IntPtr Handle => _handle.Handle;
[DllImport(Dll.Filename)]
private static extern void DestroyOfflineStream(IntPtr handle);
[DllImport(Dll.Filename, EntryPoint = "AcceptWaveformOffline")]
private static extern void AcceptWaveform(IntPtr handle, int sampleRate, float[] samples, int n);
[DllImport(Dll.Filename, EntryPoint = "GetOfflineStreamResult")]
private static extern IntPtr GetResult(IntPtr handle);
[DllImport(Dll.Filename, EntryPoint = "DestroyOfflineRecognizerResult")]
private static extern void DestroyResult(IntPtr handle);
}
public class OfflineRecognizer : IDisposable
{
public OfflineRecognizer(OfflineRecognizerConfig config)
{
IntPtr h = CreateOfflineRecognizer(ref config);
_handle = new HandleRef(this, h);
}
public OfflineStream CreateStream()
{
IntPtr p = CreateOfflineStream(_handle.Handle);
return new OfflineStream(p);
}
/// You have to ensure that IsReady(stream) returns true before
/// you call this method
public void Decode(OfflineStream stream)
{
Decode(_handle.Handle, stream.Handle);
}
// The caller should ensure all passed streams are ready for decoding.
public void Decode(IEnumerable<OfflineStream> streams)
{
IntPtr[] ptrs = streams.Select(s => s.Handle).ToArray();
Decode(_handle.Handle, ptrs, ptrs.Length);
}
public void Dispose()
{
Cleanup();
// Prevent the object from being placed on the
// finalization queue
System.GC.SuppressFinalize(this);
}
~OfflineRecognizer()
{
Cleanup();
}
private void Cleanup()
{
DestroyOfflineRecognizer(_handle.Handle);
// Don't permit the handle to be used again.
_handle = new HandleRef(this, IntPtr.Zero);
}
private HandleRef _handle;
[DllImport(Dll.Filename)]
private static extern IntPtr CreateOfflineRecognizer(ref OfflineRecognizerConfig config);
[DllImport(Dll.Filename)]
private static extern void DestroyOfflineRecognizer(IntPtr handle);
[DllImport(Dll.Filename)]
private static extern IntPtr CreateOfflineStream(IntPtr handle);
[DllImport(Dll.Filename, EntryPoint = "DecodeOfflineStream")]
private static extern void Decode(IntPtr handle, IntPtr stream);
[DllImport(Dll.Filename, EntryPoint = "DecodeMultipleOfflineStreams")]
private static extern void Decode(IntPtr handle, IntPtr[] streams, int n);
}
}

291
scripts/dotnet/online.cs Normal file
View File

@@ -0,0 +1,291 @@
/// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
/// Copyright (c) 2023 by manyeyes
using System.Linq;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System;
namespace SherpaOnnx
{
internal static class Dll
{
public const string Filename = "sherpa-onnx-c-api";
}
[StructLayout(LayoutKind.Sequential)]
public struct OnlineTransducerModelConfig
{
public OnlineTransducerModelConfig()
{
Encoder = "";
Decoder = "";
Joiner = "";
Tokens = "";
NumThreads = 1;
Debug = 0;
}
[MarshalAs(UnmanagedType.LPStr)]
public string Encoder;
[MarshalAs(UnmanagedType.LPStr)]
public string Decoder;
[MarshalAs(UnmanagedType.LPStr)]
public string Joiner;
[MarshalAs(UnmanagedType.LPStr)]
public string Tokens;
/// Number of threads used to run the neural network model
public int NumThreads;
/// true to print debug information of the model
public int Debug;
}
/// It expects 16 kHz 16-bit single channel wave format.
[StructLayout(LayoutKind.Sequential)]
public struct FeatureConfig
{
public FeatureConfig()
{
SampleRate = 16000;
FeatureDim = 80;
}
/// Sample rate of the input data. MUST match the one expected
/// by the model. For instance, it should be 16000 for models provided
/// by us.
public int SampleRate;
/// Feature dimension of the model.
/// For instance, it should be 80 for models provided by us.
public int FeatureDim;
}
[StructLayout(LayoutKind.Sequential)]
public struct OnlineRecognizerConfig
{
public OnlineRecognizerConfig()
{
FeatConfig = new FeatureConfig();
TransducerModelConfig = new OnlineTransducerModelConfig();
DecodingMethod = "greedy_search";
MaxActivePaths = 4;
EnableEndpoint = 0;
Rule1MinTrailingSilence = 1.2F;
Rule2MinTrailingSilence = 2.4F;
Rule3MinUtteranceLength = 20.0F;
}
public FeatureConfig FeatConfig;
public OnlineTransducerModelConfig TransducerModelConfig;
[MarshalAs(UnmanagedType.LPStr)]
public string DecodingMethod;
/// Used only when decoding_method is modified_beam_search
/// Example value: 4
public int MaxActivePaths;
/// 0 to disable endpoint detection.
/// A non-zero value to enable endpoint detection.
public int EnableEndpoint;
/// An endpoint is detected if trailing silence in seconds is larger than
/// this value even if nothing has been decoded.
/// Used only when enable_endpoint is not 0.
public float Rule1MinTrailingSilence;
/// An endpoint is detected if trailing silence in seconds is larger than
/// this value after something that is not blank has been decoded.
/// Used only when enable_endpoint is not 0.
public float Rule2MinTrailingSilence;
/// An endpoint is detected if the utterance in seconds is larger than
/// this value.
/// Used only when enable_endpoint is not 0.
public float Rule3MinUtteranceLength;
}
public class OnlineRecognizerResult
{
public OnlineRecognizerResult(IntPtr handle)
{
Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl));
_text = Marshal.PtrToStringUTF8(impl.Text);
}
[StructLayout(LayoutKind.Sequential)]
struct Impl
{
public IntPtr Text;
}
private String _text;
public String Text => _text;
}
public class OnlineStream : IDisposable
{
public OnlineStream(IntPtr p)
{
_handle = new HandleRef(this, p);
}
public void AcceptWaveform(int sampleRate, float[] samples)
{
AcceptWaveform(Handle, sampleRate, samples, samples.Length);
}
public void InputFinished()
{
InputFinished(Handle);
}
~OnlineStream()
{
Cleanup();
}
public void Dispose()
{
Cleanup();
// Prevent the object from being placed on the
// finalization queue
System.GC.SuppressFinalize(this);
}
private void Cleanup()
{
DestroyOnlineStream(Handle);
// Don't permit the handle to be used again.
_handle = new HandleRef(this, IntPtr.Zero);
}
private HandleRef _handle;
public IntPtr Handle => _handle.Handle;
[DllImport(Dll.Filename)]
private static extern void DestroyOnlineStream(IntPtr handle);
[DllImport(Dll.Filename)]
private static extern void AcceptWaveform(IntPtr handle, int sampleRate, float[] samples, int n);
[DllImport(Dll.Filename)]
private static extern void InputFinished(IntPtr handle);
}
// please see
// https://www.mono-project.com/docs/advanced/pinvoke/#gc-safe-pinvoke-code
// https://www.mono-project.com/docs/advanced/pinvoke/#properly-disposing-of-resources
public class OnlineRecognizer : IDisposable
{
public OnlineRecognizer(OnlineRecognizerConfig config)
{
IntPtr h = CreateOnlineRecognizer(ref config);
_handle = new HandleRef(this, h);
}
public OnlineStream CreateStream()
{
IntPtr p = CreateOnlineStream(_handle.Handle);
return new OnlineStream(p);
}
/// Return true if the passed stream is ready for decoding.
public bool IsReady(OnlineStream stream)
{
return IsReady(_handle.Handle, stream.Handle) != 0;
}
/// Return true if an endpoint is detected for this stream.
/// You probably need to invoke Reset(stream) when this method returns
/// true.
public bool IsEndpoint(OnlineStream stream)
{
return IsEndpoint(_handle.Handle, stream.Handle) != 0;
}
/// You have to ensure that IsReady(stream) returns true before
/// you call this method
public void Decode(OnlineStream stream)
{
Decode(_handle.Handle, stream.Handle);
}
// The caller should ensure all passed streams are ready for decoding.
public void Decode(IEnumerable<OnlineStream> streams)
{
IntPtr[] ptrs = streams.Select(s => s.Handle).ToArray();
Decode(_handle.Handle, ptrs, ptrs.Length);
}
public OnlineRecognizerResult GetResult(OnlineStream stream)
{
IntPtr h = GetResult(_handle.Handle, stream.Handle);
OnlineRecognizerResult result = new OnlineRecognizerResult(h);
DestroyResult(h);
return result;
}
/// When this method returns, IsEndpoint(stream) will return false.
public void Reset(OnlineStream stream)
{
Reset(_handle.Handle, stream.Handle);
}
public void Dispose()
{
Cleanup();
// Prevent the object from being placed on the
// finalization queue
System.GC.SuppressFinalize(this);
}
~OnlineRecognizer()
{
Cleanup();
}
private void Cleanup()
{
DestroyOnlineRecognizer(_handle.Handle);
// Don't permit the handle to be used again.
_handle = new HandleRef(this, IntPtr.Zero);
}
private HandleRef _handle;
[DllImport(Dll.Filename)]
private static extern IntPtr CreateOnlineRecognizer(ref OnlineRecognizerConfig config);
[DllImport(Dll.Filename)]
private static extern void DestroyOnlineRecognizer(IntPtr handle);
[DllImport(Dll.Filename)]
private static extern IntPtr CreateOnlineStream(IntPtr handle);
[DllImport(Dll.Filename, EntryPoint = "IsOnlineStreamReady")]
private static extern int IsReady(IntPtr handle, IntPtr stream);
[DllImport(Dll.Filename, EntryPoint = "DecodeOnlineStream")]
private static extern void Decode(IntPtr handle, IntPtr stream);
[DllImport(Dll.Filename, EntryPoint = "DecodeMultipleOnlineStreams")]
private static extern void Decode(IntPtr handle, IntPtr[] streams, int n);
[DllImport(Dll.Filename, EntryPoint = "GetOnlineStreamResult")]
private static extern IntPtr GetResult(IntPtr handle, IntPtr stream);
[DllImport(Dll.Filename, EntryPoint = "DestroyOnlineRecognizerResult")]
private static extern void DestroyResult(IntPtr result);
[DllImport(Dll.Filename)]
private static extern void Reset(IntPtr handle, IntPtr stream);
[DllImport(Dll.Filename)]
private static extern int IsEndpoint(IntPtr handle, IntPtr stream);
}
}

33
scripts/dotnet/run.sh Executable file
View File

@@ -0,0 +1,33 @@
#!/usr/bin/env bash
# Copyright (c) 2023 Xiaomi Corporation
set -ex
mkdir -p macos linux windows all
cp ./online.cs all
cp ./offline.cs all
./generate.py
pushd linux
dotnet build -c Release
dotnet pack -c Release -o ../packages
popd
pushd macos
dotnet build -c Release
dotnet pack -c Release -o ../packages
popd
pushd windows
dotnet build -c Release
dotnet pack -c Release -o ../packages
popd
pushd all
dotnet build -c Release
dotnet pack -c Release -o ../packages
popd
ls -lh packages

View File

@@ -0,0 +1,56 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<PackageLicenseExpression>Apache-2.0</PackageLicenseExpression>
<PackageReadmeFile>README.md</PackageReadmeFile>
<OutputType>Library</OutputType>
<LangVersion>10.0</LangVersion>
<TargetFrameworks>netstandard2.1;netcoreapp3.1;net6.0;net7.0</TargetFrameworks>
<RuntimeIdentifiers>linux-x64;osx-x64;win-x64</RuntimeIdentifiers>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
<AssemblyName>sherpa-onnx</AssemblyName>
<Version>{{ version }}</Version>
<PackageProjectUrl>https://github.com/k2-fsa/sherpa-onnx</PackageProjectUrl>
<RepositoryUrl>https://github.com/k2-fsa/sherpa-onnx</RepositoryUrl>
<PackageTags>speech recognition voice audio stt asr speech-to-text AI offline
privacy open-sourced next-gen-kaldi k2 kaldi2 sherpa-onnx</PackageTags>
<Authors>The Next-gen Kaldi development team</Authors>
<Owners>The Next-gen Kaldi development team</Owners>
<Company>Xiaomi Corporation</Company>
<Copyright>Copyright 2019-2023 Xiaomi Corporation</Copyright>
<Description>sherpa-onnx is an open-source real-time speech recognition toolkit developed
by the Next-gen Kaldi team. It supports streaming recognition on a variety of
platforms such as Android, iOS, Raspberry, Linux, Windows, macOS, etc.
It does not require Internet connection during recognition.
See the documentation https://k2-fsa.github.io/sherpa/onnx/index.html
for details.
</Description>
<!-- Pack Option -->
<Title>sherpa-onnx v{{ version }}</Title>
<PackageId>org.k2fsa.sherpa.onnx</PackageId>
<!-- Signing -->
<SignAssembly>false</SignAssembly>
<PublicSign>false</PublicSign>
<DelaySign>false</DelaySign>
</PropertyGroup>
<PropertyGroup>
<RestoreSources>{{ packages_dir }};$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources>
</PropertyGroup>
<ItemGroup>
<None Include="../README.md" Pack="true" PackagePath="/"/>
</ItemGroup>
<ItemGroup>
<PackageReference Include="org.k2fsa.sherpa.onnx.runtime.linux-x64" Version="{{ version }}" />
<PackageReference Include="org.k2fsa.sherpa.onnx.runtime.osx-x64" Version="{{ version }}" />
<PackageReference Include="org.k2fsa.sherpa.onnx.runtime.win-x64" Version="{{ version }}" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,50 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<PackageLicenseExpression>Apache-2.0</PackageLicenseExpression>
<PackageReadmeFile>README.md</PackageReadmeFile>
<OutputType>Library</OutputType>
<TargetFrameworks>netstandard2.0;netcoreapp3.1;net6.0</TargetFrameworks>
<RuntimeIdentifier>{{ dotnet_rid }}</RuntimeIdentifier>
<AssemblyName>sherpa-onnx</AssemblyName>
<Version>{{ version }}</Version>
<PackageProjectUrl>https://github.com/k2-fsa/sherpa-onnx</PackageProjectUrl>
<RepositoryUrl>https://github.com/k2-fsa/sherpa-onnx</RepositoryUrl>
<PackageTags>speech recognition voice audio stt asr speech-to-text AI offline
privacy open-sourced next-gen-kaldi k2 kaldi2 sherpa-onnx</PackageTags>
<!-- Nuget Properties -->
<Description>.NET native {{ dotnet_rid }} wrapper for the sherpa-onnx project.
In general, you don't need to use this package directly.
Please use https://www.nuget.org/packages/org.k2fsa.sherpa.onnx instead
</Description>
<IncludeBuildOutput>false</IncludeBuildOutput>
<!-- Pack Option -->
<Title>sherpa-onnx {{ dotnet_rid }} v{{ version }}</Title>
<PackageId>org.k2fsa.sherpa.onnx.runtime.{{ dotnet_rid }}</PackageId>
<!-- Signing -->
<SignAssembly>false</SignAssembly>
<PublicSign>false</PublicSign>
<DelaySign>false</DelaySign>
</PropertyGroup>
<ItemGroup>
<None Include="../README.md" Pack="true" PackagePath="/"/>
</ItemGroup>
<ItemGroup>
<!-- Native library must be in native directory... -->
<!-- If project is built as a STATIC_LIBRARY (e.g. Windows) then we don't have to include it -->
<Content Include="
{{ libs }}
">
<PackagePath>runtimes/{{ dotnet_rid }}/native/%(Filename)%(Extension)</PackagePath>
<Pack>true</Pack>
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
</ItemGroup>
</Project>