diff --git a/.github/scripts/test-dot-net.sh b/.github/scripts/test-dot-net.sh index a6cddd9d..70dc4fb7 100755 --- a/.github/scripts/test-dot-net.sh +++ b/.github/scripts/test-dot-net.sh @@ -2,7 +2,10 @@ cd dotnet-examples/ -cd speaker-identification +cd offline-punctuation +./run.sh + +cd ../speaker-identification ./run.sh cd ../streaming-hlg-decoding/ diff --git a/.github/workflows/test-dot-net.yaml b/.github/workflows/test-dot-net.yaml index d0214d43..500d9e02 100644 --- a/.github/workflows/test-dot-net.yaml +++ b/.github/workflows/test-dot-net.yaml @@ -196,6 +196,7 @@ jobs: cp -v scripts/dotnet/examples/spoken-language-identification.csproj dotnet-examples/spoken-language-identification/ cp -v scripts/dotnet/examples/streaming-hlg-decoding.csproj dotnet-examples/streaming-hlg-decoding cp -v scripts/dotnet/examples/speaker-identification.csproj dotnet-examples/speaker-identification + cp -v scripts/dotnet/examples/offline-punctuation.csproj dotnet-examples/offline-punctuation ls -lh /tmp diff --git a/dotnet-examples/offline-punctuation/Program.cs b/dotnet-examples/offline-punctuation/Program.cs new file mode 100644 index 00000000..83a54fea --- /dev/null +++ b/dotnet-examples/offline-punctuation/Program.cs @@ -0,0 +1,45 @@ +// Copyright (c) 2024 Xiaomi Corporation +// +// This file shows how to add punctuations to text. +// +// 1. Download a model from +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models +// +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 +// +// 3. Now run it +// +// dotnet run + +using SherpaOnnx; +using System.Collections.Generic; +using System; + +class OfflinePunctuationDemo +{ + + static void Main(string[] args) + { + var config = new OfflinePunctuationConfig(); + config.Model.CtTransformer = "./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx"; + config.Model.Debug = 1; + config.Model.NumThreads = 1; + var punct = new OfflinePunctuation(config); + + string[] textList = new string[] { + "这是一个测试你好吗How are you我很好thank you are you ok谢谢你", + "我们都是木头人不会说话不会动", + "The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry", + }; + + Console.WriteLine("---------"); + foreach (string text in textList) + { + string textWithPunct = punct.AddPunct(text); + Console.WriteLine("Input text: {0}", text); + Console.WriteLine("Output text: {0}", textWithPunct); + Console.WriteLine("---------"); + } + } +} + diff --git a/dotnet-examples/offline-punctuation/offline-punctuation.csproj b/dotnet-examples/offline-punctuation/offline-punctuation.csproj new file mode 100644 index 00000000..4df05647 --- /dev/null +++ b/dotnet-examples/offline-punctuation/offline-punctuation.csproj @@ -0,0 +1,15 @@ + + + + Exe + net6.0 + offline_punctuation + enable + enable + + + + + + + diff --git a/dotnet-examples/offline-punctuation/run.sh b/dotnet-examples/offline-punctuation/run.sh new file mode 100755 index 00000000..f5920bc6 --- /dev/null +++ b/dotnet-examples/offline-punctuation/run.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +set -ex + +if [ ! -e ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 + tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 + rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 +fi + +dotnet run diff --git a/dotnet-examples/sherpa-onnx.sln b/dotnet-examples/sherpa-onnx.sln index 7d295e15..fae0af92 100644 --- a/dotnet-examples/sherpa-onnx.sln +++ b/dotnet-examples/sherpa-onnx.sln @@ -19,6 +19,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "streaming-hlg-decoding", "s EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speaker-identification", "speaker-identification\speaker-identification.csproj", "{2B1B140E-A92F-426B-B0DF-5D916B67304F}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-punctuation", "offline-punctuation\offline-punctuation.csproj", "{42D85582-BB63-4259-A4EA-837D66AC078B}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -60,5 +62,9 @@ Global {2B1B140E-A92F-426B-B0DF-5D916B67304F}.Debug|Any CPU.Build.0 = Debug|Any CPU {2B1B140E-A92F-426B-B0DF-5D916B67304F}.Release|Any CPU.ActiveCfg = Release|Any CPU {2B1B140E-A92F-426B-B0DF-5D916B67304F}.Release|Any CPU.Build.0 = Release|Any CPU + {42D85582-BB63-4259-A4EA-837D66AC078B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {42D85582-BB63-4259-A4EA-837D66AC078B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {42D85582-BB63-4259-A4EA-837D66AC078B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {42D85582-BB63-4259-A4EA-837D66AC078B}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection EndGlobal diff --git a/scripts/dotnet/FeatureConfig.cs b/scripts/dotnet/FeatureConfig.cs index 3ad48be4..f7bfb97c 100644 --- a/scripts/dotnet/FeatureConfig.cs +++ b/scripts/dotnet/FeatureConfig.cs @@ -29,4 +29,4 @@ namespace SherpaOnnx public int FeatureDim; } -} \ No newline at end of file +} diff --git a/scripts/dotnet/OfflineLMConfig.cs b/scripts/dotnet/OfflineLMConfig.cs index b561821a..51683a71 100644 --- a/scripts/dotnet/OfflineLMConfig.cs +++ b/scripts/dotnet/OfflineLMConfig.cs @@ -8,7 +8,6 @@ using System; namespace SherpaOnnx { - [StructLayout(LayoutKind.Sequential)] public struct OfflineLMConfig { @@ -22,5 +21,4 @@ namespace SherpaOnnx public float Scale; } - -} \ No newline at end of file +} diff --git a/scripts/dotnet/OfflineModelConfig.cs b/scripts/dotnet/OfflineModelConfig.cs index 9ed5eb53..58b24dbb 100644 --- a/scripts/dotnet/OfflineModelConfig.cs +++ b/scripts/dotnet/OfflineModelConfig.cs @@ -8,7 +8,6 @@ using System; namespace SherpaOnnx { - [StructLayout(LayoutKind.Sequential)] public struct OfflineModelConfig { @@ -44,6 +43,4 @@ namespace SherpaOnnx [MarshalAs(UnmanagedType.LPStr)] public string ModelType; } - - -} \ No newline at end of file +} diff --git a/scripts/dotnet/OfflineNemoEncDecCtcModelConfig.cs b/scripts/dotnet/OfflineNemoEncDecCtcModelConfig.cs index 00ac91a9..8dbf19d9 100644 --- a/scripts/dotnet/OfflineNemoEncDecCtcModelConfig.cs +++ b/scripts/dotnet/OfflineNemoEncDecCtcModelConfig.cs @@ -8,7 +8,6 @@ using System; namespace SherpaOnnx { - [StructLayout(LayoutKind.Sequential)] public struct OfflineNemoEncDecCtcModelConfig { @@ -19,4 +18,4 @@ namespace SherpaOnnx [MarshalAs(UnmanagedType.LPStr)] public string Model; } -} \ No newline at end of file +} diff --git a/scripts/dotnet/OfflineParaformerModelConfig.cs b/scripts/dotnet/OfflineParaformerModelConfig.cs index 0fe99ad4..6acf194e 100644 --- a/scripts/dotnet/OfflineParaformerModelConfig.cs +++ b/scripts/dotnet/OfflineParaformerModelConfig.cs @@ -18,5 +18,4 @@ namespace SherpaOnnx [MarshalAs(UnmanagedType.LPStr)] public string Model; } - -} \ No newline at end of file +} diff --git a/scripts/dotnet/OfflinePunctuation.cs b/scripts/dotnet/OfflinePunctuation.cs new file mode 100644 index 00000000..9d3c5400 --- /dev/null +++ b/scripts/dotnet/OfflinePunctuation.cs @@ -0,0 +1,88 @@ +/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang) + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + public class OfflinePunctuation : IDisposable + { + public OfflinePunctuation(OfflinePunctuationConfig config) + { + IntPtr h = SherpaOnnxCreateOfflinePunctuation(ref config); + _handle = new HandleRef(this, h); + } + + public String AddPunct(String text) + { + IntPtr p = SherpaOfflinePunctuationAddPunct(_handle.Handle, text); + + string s = ""; + int length = 0; + + unsafe + { + byte* b = (byte*)p; + if (b != null) + { + while (*b != 0) + { + ++b; + length += 1; + } + } + } + + if (length > 0) + { + byte[] stringBuffer = new byte[length]; + Marshal.Copy(p, stringBuffer, 0, length); + s = Encoding.UTF8.GetString(stringBuffer); + } + + SherpaOfflinePunctuationFreeText(p); + + return s; + } + + public void Dispose() + { + Cleanup(); + // Prevent the object from being placed on the + // finalization queue + System.GC.SuppressFinalize(this); + } + + ~OfflinePunctuation() + { + Cleanup(); + } + + private void Cleanup() + { + SherpaOnnxDestroyOfflinePunctuation(_handle.Handle); + + // Don't permit the handle to be used again. + _handle = new HandleRef(this, IntPtr.Zero); + } + + private HandleRef _handle; + + + [DllImport(Dll.Filename)] + private static extern IntPtr SherpaOnnxCreateOfflinePunctuation(ref OfflinePunctuationConfig config); + + [DllImport(Dll.Filename)] + private static extern void SherpaOnnxDestroyOfflinePunctuation(IntPtr handle); + + [DllImport(Dll.Filename)] + private static extern IntPtr SherpaOfflinePunctuationAddPunct(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string text); + + [DllImport(Dll.Filename)] + private static extern void SherpaOfflinePunctuationFreeText(IntPtr p); + } +} + diff --git a/scripts/dotnet/OfflinePunctuationConfig.cs b/scripts/dotnet/OfflinePunctuationConfig.cs new file mode 100644 index 00000000..6c503a89 --- /dev/null +++ b/scripts/dotnet/OfflinePunctuationConfig.cs @@ -0,0 +1,21 @@ +/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang) + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + [StructLayout(LayoutKind.Sequential)] + public struct OfflinePunctuationConfig + { + public OfflinePunctuationConfig() + { + Model = new OfflinePunctuationModelConfig(); + } + public OfflinePunctuationModelConfig Model; + } +} + diff --git a/scripts/dotnet/OfflinePunctuationModelConfig.cs b/scripts/dotnet/OfflinePunctuationModelConfig.cs new file mode 100644 index 00000000..f7600836 --- /dev/null +++ b/scripts/dotnet/OfflinePunctuationModelConfig.cs @@ -0,0 +1,32 @@ +/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang) + +using System.Linq; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using System; + +namespace SherpaOnnx +{ + [StructLayout(LayoutKind.Sequential)] + public struct OfflinePunctuationModelConfig + { + public OfflinePunctuationModelConfig() + { + CtTransformer = ""; + NumThreads = 1; + Debug = 0; + Provider = "cpu"; + } + + [MarshalAs(UnmanagedType.LPStr)] + public string CtTransformer; + + public int NumThreads; + + public int Debug; + + [MarshalAs(UnmanagedType.LPStr)] + public string Provider; + } +} diff --git a/scripts/dotnet/OfflineRecognizer.cs b/scripts/dotnet/OfflineRecognizer.cs index 2114bee4..58268048 100644 --- a/scripts/dotnet/OfflineRecognizer.cs +++ b/scripts/dotnet/OfflineRecognizer.cs @@ -72,5 +72,4 @@ namespace SherpaOnnx [DllImport(Dll.Filename, EntryPoint = "DecodeMultipleOfflineStreams")] private static extern void Decode(IntPtr handle, IntPtr[] streams, int n); } - -} \ No newline at end of file +} diff --git a/scripts/dotnet/OfflineRecognizerConfig.cs b/scripts/dotnet/OfflineRecognizerConfig.cs index 2594b692..23d6e18a 100644 --- a/scripts/dotnet/OfflineRecognizerConfig.cs +++ b/scripts/dotnet/OfflineRecognizerConfig.cs @@ -8,7 +8,6 @@ using System; namespace SherpaOnnx { - [StructLayout(LayoutKind.Sequential)] public struct OfflineRecognizerConfig { @@ -38,6 +37,4 @@ namespace SherpaOnnx public float HotwordsScore; } - - -} \ No newline at end of file +} diff --git a/scripts/dotnet/OfflineRecognizerResult.cs b/scripts/dotnet/OfflineRecognizerResult.cs index f5925a39..433d70bc 100644 --- a/scripts/dotnet/OfflineRecognizerResult.cs +++ b/scripts/dotnet/OfflineRecognizerResult.cs @@ -8,7 +8,6 @@ using System; namespace SherpaOnnx { - public class OfflineRecognizerResult { public OfflineRecognizerResult(IntPtr handle) @@ -44,6 +43,4 @@ namespace SherpaOnnx private String _text; public String Text => _text; } - - -} \ No newline at end of file +} diff --git a/scripts/dotnet/OfflineStream.cs b/scripts/dotnet/OfflineStream.cs index d9ac2467..0a7cafb8 100644 --- a/scripts/dotnet/OfflineStream.cs +++ b/scripts/dotnet/OfflineStream.cs @@ -8,7 +8,6 @@ using System; namespace SherpaOnnx { - public class OfflineStream : IDisposable { public OfflineStream(IntPtr p) @@ -68,5 +67,4 @@ namespace SherpaOnnx [DllImport(Dll.Filename, EntryPoint = "DestroyOfflineRecognizerResult")] private static extern void DestroyResult(IntPtr handle); } - -} \ No newline at end of file +} diff --git a/scripts/dotnet/OfflineTdnnModelConfig.cs b/scripts/dotnet/OfflineTdnnModelConfig.cs index 6b8d72e0..5a27c0b4 100644 --- a/scripts/dotnet/OfflineTdnnModelConfig.cs +++ b/scripts/dotnet/OfflineTdnnModelConfig.cs @@ -18,5 +18,4 @@ namespace SherpaOnnx [MarshalAs(UnmanagedType.LPStr)] public string Model; } - -} \ No newline at end of file +} diff --git a/scripts/dotnet/OfflineTransducerModelConfig.cs b/scripts/dotnet/OfflineTransducerModelConfig.cs index f3c5f2f0..7c10745f 100644 --- a/scripts/dotnet/OfflineTransducerModelConfig.cs +++ b/scripts/dotnet/OfflineTransducerModelConfig.cs @@ -26,5 +26,4 @@ namespace SherpaOnnx [MarshalAs(UnmanagedType.LPStr)] public string Joiner; } - -} \ No newline at end of file +} diff --git a/scripts/dotnet/OfflineTtsConfig.cs b/scripts/dotnet/OfflineTtsConfig.cs index 0eb1d1ba..c0f49c53 100644 --- a/scripts/dotnet/OfflineTtsConfig.cs +++ b/scripts/dotnet/OfflineTtsConfig.cs @@ -28,5 +28,4 @@ namespace SherpaOnnx [MarshalAs(UnmanagedType.LPStr)] public string RuleFars; } - -} \ No newline at end of file +} diff --git a/scripts/dotnet/OfflineTtsModelConfig.cs b/scripts/dotnet/OfflineTtsModelConfig.cs index ce8739ec..50ccb667 100644 --- a/scripts/dotnet/OfflineTtsModelConfig.cs +++ b/scripts/dotnet/OfflineTtsModelConfig.cs @@ -8,7 +8,6 @@ using System; namespace SherpaOnnx { - [StructLayout(LayoutKind.Sequential)] public struct OfflineTtsModelConfig { @@ -26,4 +25,4 @@ namespace SherpaOnnx [MarshalAs(UnmanagedType.LPStr)] public string Provider; } -} \ No newline at end of file +} diff --git a/scripts/dotnet/OfflineWhisperModelConfig.cs b/scripts/dotnet/OfflineWhisperModelConfig.cs index f66d2d32..91af9979 100644 --- a/scripts/dotnet/OfflineWhisperModelConfig.cs +++ b/scripts/dotnet/OfflineWhisperModelConfig.cs @@ -33,5 +33,4 @@ namespace SherpaOnnx public int TailPaddings; } - -} \ No newline at end of file +} diff --git a/scripts/dotnet/OnlineCtcFstDecoderConfig.cs b/scripts/dotnet/OnlineCtcFstDecoderConfig.cs index e7ab8263..9669a8cf 100644 --- a/scripts/dotnet/OnlineCtcFstDecoderConfig.cs +++ b/scripts/dotnet/OnlineCtcFstDecoderConfig.cs @@ -24,5 +24,4 @@ namespace SherpaOnnx public int MaxActive; } - -} \ No newline at end of file +} diff --git a/scripts/dotnet/OnlineModelConfig.cs b/scripts/dotnet/OnlineModelConfig.cs index b9c07467..1471959d 100644 --- a/scripts/dotnet/OnlineModelConfig.cs +++ b/scripts/dotnet/OnlineModelConfig.cs @@ -10,7 +10,6 @@ using System; namespace SherpaOnnx { - [StructLayout(LayoutKind.Sequential)] public struct OnlineModelConfig { @@ -45,5 +44,4 @@ namespace SherpaOnnx [MarshalAs(UnmanagedType.LPStr)] public string ModelType; } - -} \ No newline at end of file +} diff --git a/scripts/dotnet/OnlineParaformerModelConfig.cs b/scripts/dotnet/OnlineParaformerModelConfig.cs index 5a24265f..0afc6d90 100644 --- a/scripts/dotnet/OnlineParaformerModelConfig.cs +++ b/scripts/dotnet/OnlineParaformerModelConfig.cs @@ -10,7 +10,6 @@ using System; namespace SherpaOnnx { - [StructLayout(LayoutKind.Sequential)] public struct OnlineParaformerModelConfig { @@ -26,5 +25,4 @@ namespace SherpaOnnx [MarshalAs(UnmanagedType.LPStr)] public string Decoder; } - -} \ No newline at end of file +} diff --git a/scripts/dotnet/OnlineRecognizerConfig.cs b/scripts/dotnet/OnlineRecognizerConfig.cs index 78afce4e..6ba6f5b6 100644 --- a/scripts/dotnet/OnlineRecognizerConfig.cs +++ b/scripts/dotnet/OnlineRecognizerConfig.cs @@ -10,7 +10,6 @@ using System; namespace SherpaOnnx { - [StructLayout(LayoutKind.Sequential)] public struct OnlineRecognizerConfig { @@ -66,5 +65,4 @@ namespace SherpaOnnx public OnlineCtcFstDecoderConfig CtcFstDecoderConfig; } - -} \ No newline at end of file +} diff --git a/scripts/dotnet/OnlineRecognizerResult.cs b/scripts/dotnet/OnlineRecognizerResult.cs index 0cab9db4..62f9c047 100644 --- a/scripts/dotnet/OnlineRecognizerResult.cs +++ b/scripts/dotnet/OnlineRecognizerResult.cs @@ -10,7 +10,6 @@ using System; namespace SherpaOnnx { - public class OnlineRecognizerResult { public OnlineRecognizerResult(IntPtr handle) @@ -103,4 +102,4 @@ namespace SherpaOnnx private float[] _timestamps; public float[] Timestamps => _timestamps; } -} \ No newline at end of file +} diff --git a/scripts/dotnet/OnlineStream.cs b/scripts/dotnet/OnlineStream.cs index fd5c07f0..fde57401 100644 --- a/scripts/dotnet/OnlineStream.cs +++ b/scripts/dotnet/OnlineStream.cs @@ -10,7 +10,6 @@ using System; namespace SherpaOnnx { - public class OnlineStream : IDisposable { public OnlineStream(IntPtr p) @@ -61,5 +60,4 @@ namespace SherpaOnnx [DllImport(Dll.Filename)] private static extern void InputFinished(IntPtr handle); } - -} \ No newline at end of file +} diff --git a/scripts/dotnet/OnlineTransducerModelConfig.cs b/scripts/dotnet/OnlineTransducerModelConfig.cs index 0a24dd15..3c9d73b8 100644 --- a/scripts/dotnet/OnlineTransducerModelConfig.cs +++ b/scripts/dotnet/OnlineTransducerModelConfig.cs @@ -10,7 +10,6 @@ using System; namespace SherpaOnnx { - [StructLayout(LayoutKind.Sequential)] public struct OnlineTransducerModelConfig { @@ -30,5 +29,4 @@ namespace SherpaOnnx [MarshalAs(UnmanagedType.LPStr)] public string Joiner; } - -} \ No newline at end of file +} diff --git a/scripts/dotnet/examples/offline-punctuation.csproj b/scripts/dotnet/examples/offline-punctuation.csproj new file mode 100644 index 00000000..83bd7958 --- /dev/null +++ b/scripts/dotnet/examples/offline-punctuation.csproj @@ -0,0 +1,19 @@ + + + + Exe + net6.0 + offline_punctuation + enable + enable + + + + /tmp/packages;$(RestoreSources);https://api.nuget.org/v3/index.json + + + + + + +