diff --git a/.github/scripts/test-dot-net.sh b/.github/scripts/test-dot-net.sh
new file mode 100755
index 00000000..c5c6d5a4
--- /dev/null
+++ b/.github/scripts/test-dot-net.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+
+cd dotnet-examples/
+
+cd spoken-language-identification
+./run.sh
+
+cd ../online-decode-files
+./run-zipformer2-ctc.sh
+./run-transducer.sh
+./run-paraformer.sh
+
+cd ../offline-decode-files
+./run-nemo-ctc.sh
+./run-paraformer.sh
+./run-zipformer.sh
+./run-hotwords.sh
+./run-whisper.sh
+./run-tdnn-yesno.sh
+
+cd ../offline-tts
+./run-aishell3.sh
+./run-piper.sh
+ls -lh
+
+cd ../..
+
+mkdir tts
+
+cp dotnet-examples/offline-tts/*.wav ./tts
diff --git a/.github/workflows/test-dot-net-nuget.yaml b/.github/workflows/test-dot-net-nuget.yaml
index 4af976b2..0e7f21b1 100644
--- a/.github/workflows/test-dot-net-nuget.yaml
+++ b/.github/workflows/test-dot-net-nuget.yaml
@@ -40,33 +40,10 @@ jobs:
- name: Check dotnet
run: dotnet --info
- - name: Decode a file
+ - name: Run tests
shell: bash
run: |
- cd dotnet-examples/
-
- cd online-decode-files
- ./run-transducer.sh
- ./run-paraformer.sh
-
- cd ../offline-decode-files
- ./run-nemo-ctc.sh
- ./run-paraformer.sh
- ./run-zipformer.sh
- ./run-hotwords.sh
- ./run-whisper.sh
- ./run-tdnn-yesno.sh
-
- cd ../offline-tts
- ./run-aishell3.sh
- ./run-piper.sh
- ls -lh
-
- cd ../..
-
- mkdir tts
-
- cp dotnet-examples/offline-tts/*.wav ./tts
+ .github/scripts/test-dot-net.sh
- uses: actions/upload-artifact@v4
with:
diff --git a/.github/workflows/test-dot-net.yaml b/.github/workflows/test-dot-net.yaml
index f47c838f..aa8e7b1e 100644
--- a/.github/workflows/test-dot-net.yaml
+++ b/.github/workflows/test-dot-net.yaml
@@ -177,39 +177,16 @@ jobs:
cp -v scripts/dotnet/examples/offline-decode-files.csproj dotnet-examples/offline-decode-files/
cp -v scripts/dotnet/examples/online-decode-files.csproj dotnet-examples/online-decode-files/
cp -v scripts/dotnet/examples/speech-recognition-from-microphone.csproj dotnet-examples/speech-recognition-from-microphone/
+ cp -v scripts/dotnet/examples/spoken-language-identification.csproj dotnet-examples/spoken-language-identification/
ls -lh /tmp
- - name: Decode a file
+ - name: Run tests
shell: bash
run: |
- cd dotnet-examples/
+ .github/scripts/test-dot-net.sh
- cd online-decode-files
- ./run-zipformer2-ctc.sh
- ./run-transducer.sh
- ./run-paraformer.sh
-
- cd ../offline-decode-files
- ./run-nemo-ctc.sh
- ./run-paraformer.sh
- ./run-zipformer.sh
- ./run-hotwords.sh
- ./run-whisper.sh
- ./run-tdnn-yesno.sh
-
- cd ../offline-tts
- ./run-aishell3.sh
- ./run-piper.sh
- ls -lh
-
- cd ../..
-
- mkdir tts
-
- cp dotnet-examples/offline-tts/*.wav ./tts
-
- - uses: actions/upload-artifact@v3
+ - uses: actions/upload-artifact@v4
with:
name: dot-net-tts-generated-test-files-${{ matrix.os }}
path: tts
diff --git a/dotnet-examples/sherpa-onnx.sln b/dotnet-examples/sherpa-onnx.sln
index a70405e4..6c469ba3 100644
--- a/dotnet-examples/sherpa-onnx.sln
+++ b/dotnet-examples/sherpa-onnx.sln
@@ -13,6 +13,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-tts", "offline-tts\
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-tts-play", "offline-tts-play\offline-tts-play.csproj", "{40781464-5948-462B-BA4B-98932711513F}"
EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "spoken-language-identification", "spoken-language-identification\spoken-language-identification.csproj", "{3D7CF3D6-AC45-4D50-9619-5687B1443E94}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -42,5 +44,9 @@ Global
{40781464-5948-462B-BA4B-98932711513F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{40781464-5948-462B-BA4B-98932711513F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{40781464-5948-462B-BA4B-98932711513F}.Release|Any CPU.Build.0 = Release|Any CPU
+ {3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal
diff --git a/dotnet-examples/spoken-language-identification/Program.cs b/dotnet-examples/spoken-language-identification/Program.cs
new file mode 100644
index 00000000..05a785d7
--- /dev/null
+++ b/dotnet-examples/spoken-language-identification/Program.cs
@@ -0,0 +1,42 @@
+// Copyright (c) 2024 Xiaomi Corporation
+//
+// This file shows how to do spoken language identification with whisper.
+//
+// 1. Download a whisper multilingual model. We use a tiny model below.
+// Please refer to https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+// to download more models.
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
+// tar xvf sherpa-onnx-whisper-tiny.tar.bz2
+// rm sherpa-onnx-whisper-tiny.tar.bz2
+//
+// 2. Now run it
+//
+// dotnet run
+
+using SherpaOnnx;
+using System.Collections.Generic;
+using System;
+
+class SpokenLanguageIdentificationDemo
+{
+
+ static void Main(string[] args)
+ {
+ var config = new SpokenLanguageIdentificationConfig();
+ config.Whisper.Encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx";
+ config.Whisper.Decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx";
+
+ var slid = new SpokenLanguageIdentification(config);
+ var filename = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav";
+
+ WaveReader waveReader = new WaveReader(filename);
+
+ var s = slid.CreateStream();
+ s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
+ var result = slid.Compute(s);
+ Console.WriteLine($"Filename: {filename}");
+ Console.WriteLine($"Detected language: {result.Lang}");
+ }
+}
+
diff --git a/dotnet-examples/spoken-language-identification/WaveReader.cs b/dotnet-examples/spoken-language-identification/WaveReader.cs
new file mode 120000
index 00000000..2c5d1679
--- /dev/null
+++ b/dotnet-examples/spoken-language-identification/WaveReader.cs
@@ -0,0 +1 @@
+../offline-decode-files/WaveReader.cs
\ No newline at end of file
diff --git a/dotnet-examples/spoken-language-identification/run.sh b/dotnet-examples/spoken-language-identification/run.sh
new file mode 100755
index 00000000..3b393d5f
--- /dev/null
+++ b/dotnet-examples/spoken-language-identification/run.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -d ./sherpa-onnx-whisper-tiny ]; then
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
+ tar xvf sherpa-onnx-whisper-tiny.tar.bz2
+ rm sherpa-onnx-whisper-tiny.tar.bz2
+fi
+
+dotnet run
+
diff --git a/dotnet-examples/spoken-language-identification/spoken-language-identification.csproj b/dotnet-examples/spoken-language-identification/spoken-language-identification.csproj
new file mode 100644
index 00000000..eb8b943e
--- /dev/null
+++ b/dotnet-examples/spoken-language-identification/spoken-language-identification.csproj
@@ -0,0 +1,15 @@
+
+
+
+ Exe
+ net6.0
+ spoken_language_identification
+ enable
+ enable
+
+
+
+
+
+
+
diff --git a/scripts/dotnet/examples/spoken-language-identification.csproj b/scripts/dotnet/examples/spoken-language-identification.csproj
new file mode 100644
index 00000000..ab38ac7e
--- /dev/null
+++ b/scripts/dotnet/examples/spoken-language-identification.csproj
@@ -0,0 +1,19 @@
+
+
+
+ Exe
+ net6.0
+ spoken_language_identification
+ enable
+ enable
+
+
+
+ /tmp/packages;$(RestoreSources);https://api.nuget.org/v3/index.json
+
+
+
+
+
+
+
diff --git a/scripts/dotnet/offline.cs b/scripts/dotnet/offline.cs
index 4ef2a4a1..1a8612f3 100644
--- a/scripts/dotnet/offline.cs
+++ b/scripts/dotnet/offline.cs
@@ -403,8 +403,8 @@ namespace SherpaOnnx
while (*buffer != 0)
{
++buffer;
+ length += 1;
}
- length = (int)(buffer - (byte*)impl.Text);
}
byte[] stringBuffer = new byte[length];
@@ -496,8 +496,6 @@ namespace SherpaOnnx
return new OfflineStream(p);
}
- /// You have to ensure that IsReady(stream) returns true before
- /// you call this method
public void Decode(OfflineStream stream)
{
Decode(_handle.Handle, stream.Handle);
@@ -549,4 +547,137 @@ namespace SherpaOnnx
private static extern void Decode(IntPtr handle, IntPtr[] streams, int n);
}
+ [StructLayout(LayoutKind.Sequential)]
+ public struct SpokenLanguageIdentificationWhisperConfig
+ {
+ public SpokenLanguageIdentificationWhisperConfig()
+ {
+ Encoder = "";
+ Decoder = "";
+ TailPaddings = -1;
+ }
+
+ [MarshalAs(UnmanagedType.LPStr)]
+ public string Encoder;
+
+ [MarshalAs(UnmanagedType.LPStr)]
+ public string Decoder;
+
+ public int TailPaddings;
+ }
+
+ public struct SpokenLanguageIdentificationConfig
+ {
+ public SpokenLanguageIdentificationConfig()
+ {
+ Whisper = new SpokenLanguageIdentificationWhisperConfig();
+ NumThreads = 1;
+ Debug = 0;
+ Provider = "cpu";
+ }
+ public SpokenLanguageIdentificationWhisperConfig Whisper;
+
+ public int NumThreads;
+ public int Debug;
+
+ [MarshalAs(UnmanagedType.LPStr)]
+ public string Provider;
+ }
+
+ public class SpokenLanguageIdentificationResult
+ {
+ public SpokenLanguageIdentificationResult(IntPtr handle)
+ {
+ Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl));
+
+ // PtrToStringUTF8() requires .net standard 2.1
+ // _text = Marshal.PtrToStringUTF8(impl.Text);
+
+ int length = 0;
+
+ unsafe
+ {
+ byte* buffer = (byte*)impl.Lang;
+ while (*buffer != 0)
+ {
+ ++buffer;
+ length += 1;
+ }
+ }
+
+ byte[] stringBuffer = new byte[length];
+ Marshal.Copy(impl.Lang, stringBuffer, 0, length);
+ _lang = Encoding.UTF8.GetString(stringBuffer);
+ }
+
+ [StructLayout(LayoutKind.Sequential)]
+ struct Impl
+ {
+ public IntPtr Lang;
+ }
+
+ private String _lang;
+ public String Lang => _lang;
+ }
+
+ public class SpokenLanguageIdentification : IDisposable
+ {
+ public SpokenLanguageIdentification(SpokenLanguageIdentificationConfig config)
+ {
+ IntPtr h = SherpaOnnxCreateSpokenLanguageIdentification(ref config);
+ _handle = new HandleRef(this, h);
+ }
+
+ public OfflineStream CreateStream()
+ {
+ IntPtr p = SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(_handle.Handle);
+ return new OfflineStream(p);
+ }
+
+ public SpokenLanguageIdentificationResult Compute(OfflineStream stream)
+ {
+ IntPtr h = SherpaOnnxSpokenLanguageIdentificationCompute(_handle.Handle, stream.Handle);
+ SpokenLanguageIdentificationResult result = new SpokenLanguageIdentificationResult(h);
+ SherpaOnnxDestroySpokenLanguageIdentificationResult(h);
+ return result;
+ }
+
+ public void Dispose()
+ {
+ Cleanup();
+ // Prevent the object from being placed on the
+ // finalization queue
+ System.GC.SuppressFinalize(this);
+ }
+
+ ~SpokenLanguageIdentification()
+ {
+ Cleanup();
+ }
+
+ private void Cleanup()
+ {
+ SherpaOnnxDestroySpokenLanguageIdentification(_handle.Handle);
+
+ // Don't permit the handle to be used again.
+ _handle = new HandleRef(this, IntPtr.Zero);
+ }
+
+ private HandleRef _handle;
+
+ [DllImport(Dll.Filename)]
+ private static extern IntPtr SherpaOnnxCreateSpokenLanguageIdentification(ref SpokenLanguageIdentificationConfig config);
+
+ [DllImport(Dll.Filename)]
+ private static extern void SherpaOnnxDestroySpokenLanguageIdentification(IntPtr handle);
+
+ [DllImport(Dll.Filename)]
+ private static extern IntPtr SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(IntPtr handle);
+
+ [DllImport(Dll.Filename)]
+ private static extern IntPtr SherpaOnnxSpokenLanguageIdentificationCompute(IntPtr handle, IntPtr stream);
+
+ [DllImport(Dll.Filename)]
+ private static extern void SherpaOnnxDestroySpokenLanguageIdentificationResult(IntPtr handle);
+ }
}