Add C# API for spoken language identification (#697)
This commit is contained in:
30
.github/scripts/test-dot-net.sh
vendored
Executable file
30
.github/scripts/test-dot-net.sh
vendored
Executable file
@@ -0,0 +1,30 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
cd dotnet-examples/
|
||||||
|
|
||||||
|
cd spoken-language-identification
|
||||||
|
./run.sh
|
||||||
|
|
||||||
|
cd ../online-decode-files
|
||||||
|
./run-zipformer2-ctc.sh
|
||||||
|
./run-transducer.sh
|
||||||
|
./run-paraformer.sh
|
||||||
|
|
||||||
|
cd ../offline-decode-files
|
||||||
|
./run-nemo-ctc.sh
|
||||||
|
./run-paraformer.sh
|
||||||
|
./run-zipformer.sh
|
||||||
|
./run-hotwords.sh
|
||||||
|
./run-whisper.sh
|
||||||
|
./run-tdnn-yesno.sh
|
||||||
|
|
||||||
|
cd ../offline-tts
|
||||||
|
./run-aishell3.sh
|
||||||
|
./run-piper.sh
|
||||||
|
ls -lh
|
||||||
|
|
||||||
|
cd ../..
|
||||||
|
|
||||||
|
mkdir tts
|
||||||
|
|
||||||
|
cp dotnet-examples/offline-tts/*.wav ./tts
|
||||||
27
.github/workflows/test-dot-net-nuget.yaml
vendored
27
.github/workflows/test-dot-net-nuget.yaml
vendored
@@ -40,33 +40,10 @@ jobs:
|
|||||||
- name: Check dotnet
|
- name: Check dotnet
|
||||||
run: dotnet --info
|
run: dotnet --info
|
||||||
|
|
||||||
- name: Decode a file
|
- name: Run tests
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
cd dotnet-examples/
|
.github/scripts/test-dot-net.sh
|
||||||
|
|
||||||
cd online-decode-files
|
|
||||||
./run-transducer.sh
|
|
||||||
./run-paraformer.sh
|
|
||||||
|
|
||||||
cd ../offline-decode-files
|
|
||||||
./run-nemo-ctc.sh
|
|
||||||
./run-paraformer.sh
|
|
||||||
./run-zipformer.sh
|
|
||||||
./run-hotwords.sh
|
|
||||||
./run-whisper.sh
|
|
||||||
./run-tdnn-yesno.sh
|
|
||||||
|
|
||||||
cd ../offline-tts
|
|
||||||
./run-aishell3.sh
|
|
||||||
./run-piper.sh
|
|
||||||
ls -lh
|
|
||||||
|
|
||||||
cd ../..
|
|
||||||
|
|
||||||
mkdir tts
|
|
||||||
|
|
||||||
cp dotnet-examples/offline-tts/*.wav ./tts
|
|
||||||
|
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
|
|||||||
31
.github/workflows/test-dot-net.yaml
vendored
31
.github/workflows/test-dot-net.yaml
vendored
@@ -177,39 +177,16 @@ jobs:
|
|||||||
cp -v scripts/dotnet/examples/offline-decode-files.csproj dotnet-examples/offline-decode-files/
|
cp -v scripts/dotnet/examples/offline-decode-files.csproj dotnet-examples/offline-decode-files/
|
||||||
cp -v scripts/dotnet/examples/online-decode-files.csproj dotnet-examples/online-decode-files/
|
cp -v scripts/dotnet/examples/online-decode-files.csproj dotnet-examples/online-decode-files/
|
||||||
cp -v scripts/dotnet/examples/speech-recognition-from-microphone.csproj dotnet-examples/speech-recognition-from-microphone/
|
cp -v scripts/dotnet/examples/speech-recognition-from-microphone.csproj dotnet-examples/speech-recognition-from-microphone/
|
||||||
|
cp -v scripts/dotnet/examples/spoken-language-identification.csproj dotnet-examples/spoken-language-identification/
|
||||||
|
|
||||||
ls -lh /tmp
|
ls -lh /tmp
|
||||||
|
|
||||||
- name: Decode a file
|
- name: Run tests
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
cd dotnet-examples/
|
.github/scripts/test-dot-net.sh
|
||||||
|
|
||||||
cd online-decode-files
|
- uses: actions/upload-artifact@v4
|
||||||
./run-zipformer2-ctc.sh
|
|
||||||
./run-transducer.sh
|
|
||||||
./run-paraformer.sh
|
|
||||||
|
|
||||||
cd ../offline-decode-files
|
|
||||||
./run-nemo-ctc.sh
|
|
||||||
./run-paraformer.sh
|
|
||||||
./run-zipformer.sh
|
|
||||||
./run-hotwords.sh
|
|
||||||
./run-whisper.sh
|
|
||||||
./run-tdnn-yesno.sh
|
|
||||||
|
|
||||||
cd ../offline-tts
|
|
||||||
./run-aishell3.sh
|
|
||||||
./run-piper.sh
|
|
||||||
ls -lh
|
|
||||||
|
|
||||||
cd ../..
|
|
||||||
|
|
||||||
mkdir tts
|
|
||||||
|
|
||||||
cp dotnet-examples/offline-tts/*.wav ./tts
|
|
||||||
|
|
||||||
- uses: actions/upload-artifact@v3
|
|
||||||
with:
|
with:
|
||||||
name: dot-net-tts-generated-test-files-${{ matrix.os }}
|
name: dot-net-tts-generated-test-files-${{ matrix.os }}
|
||||||
path: tts
|
path: tts
|
||||||
|
|||||||
@@ -13,6 +13,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-tts", "offline-tts\
|
|||||||
EndProject
|
EndProject
|
||||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-tts-play", "offline-tts-play\offline-tts-play.csproj", "{40781464-5948-462B-BA4B-98932711513F}"
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-tts-play", "offline-tts-play\offline-tts-play.csproj", "{40781464-5948-462B-BA4B-98932711513F}"
|
||||||
EndProject
|
EndProject
|
||||||
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "spoken-language-identification", "spoken-language-identification\spoken-language-identification.csproj", "{3D7CF3D6-AC45-4D50-9619-5687B1443E94}"
|
||||||
|
EndProject
|
||||||
Global
|
Global
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
Debug|Any CPU = Debug|Any CPU
|
Debug|Any CPU = Debug|Any CPU
|
||||||
@@ -42,5 +44,9 @@ Global
|
|||||||
{40781464-5948-462B-BA4B-98932711513F}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
{40781464-5948-462B-BA4B-98932711513F}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
{40781464-5948-462B-BA4B-98932711513F}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
{40781464-5948-462B-BA4B-98932711513F}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
{40781464-5948-462B-BA4B-98932711513F}.Release|Any CPU.Build.0 = Release|Any CPU
|
{40781464-5948-462B-BA4B-98932711513F}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
EndGlobalSection
|
EndGlobalSection
|
||||||
EndGlobal
|
EndGlobal
|
||||||
|
|||||||
42
dotnet-examples/spoken-language-identification/Program.cs
Normal file
42
dotnet-examples/spoken-language-identification/Program.cs
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
// Copyright (c) 2024 Xiaomi Corporation
|
||||||
|
//
|
||||||
|
// This file shows how to do spoken language identification with whisper.
|
||||||
|
//
|
||||||
|
// 1. Download a whisper multilingual model. We use a tiny model below.
|
||||||
|
// Please refer to https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
// to download more models.
|
||||||
|
//
|
||||||
|
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
|
||||||
|
// tar xvf sherpa-onnx-whisper-tiny.tar.bz2
|
||||||
|
// rm sherpa-onnx-whisper-tiny.tar.bz2
|
||||||
|
//
|
||||||
|
// 2. Now run it
|
||||||
|
//
|
||||||
|
// dotnet run
|
||||||
|
|
||||||
|
using SherpaOnnx;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System;
|
||||||
|
|
||||||
|
class SpokenLanguageIdentificationDemo
|
||||||
|
{
|
||||||
|
|
||||||
|
static void Main(string[] args)
|
||||||
|
{
|
||||||
|
var config = new SpokenLanguageIdentificationConfig();
|
||||||
|
config.Whisper.Encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx";
|
||||||
|
config.Whisper.Decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx";
|
||||||
|
|
||||||
|
var slid = new SpokenLanguageIdentification(config);
|
||||||
|
var filename = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav";
|
||||||
|
|
||||||
|
WaveReader waveReader = new WaveReader(filename);
|
||||||
|
|
||||||
|
var s = slid.CreateStream();
|
||||||
|
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
|
||||||
|
var result = slid.Compute(s);
|
||||||
|
Console.WriteLine($"Filename: {filename}");
|
||||||
|
Console.WriteLine($"Detected language: {result.Lang}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
1
dotnet-examples/spoken-language-identification/WaveReader.cs
Symbolic link
1
dotnet-examples/spoken-language-identification/WaveReader.cs
Symbolic link
@@ -0,0 +1 @@
|
|||||||
|
../offline-decode-files/WaveReader.cs
|
||||||
12
dotnet-examples/spoken-language-identification/run.sh
Executable file
12
dotnet-examples/spoken-language-identification/run.sh
Executable file
@@ -0,0 +1,12 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
if [ ! -d ./sherpa-onnx-whisper-tiny ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-whisper-tiny.tar.bz2
|
||||||
|
rm sherpa-onnx-whisper-tiny.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
dotnet run
|
||||||
|
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<OutputType>Exe</OutputType>
|
||||||
|
<TargetFramework>net6.0</TargetFramework>
|
||||||
|
<RootNamespace>spoken_language_identification</RootNamespace>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<OutputType>Exe</OutputType>
|
||||||
|
<TargetFramework>net6.0</TargetFramework>
|
||||||
|
<RootNamespace>spoken_language_identification</RootNamespace>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<RestoreSources>/tmp/packages;$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
@@ -403,8 +403,8 @@ namespace SherpaOnnx
|
|||||||
while (*buffer != 0)
|
while (*buffer != 0)
|
||||||
{
|
{
|
||||||
++buffer;
|
++buffer;
|
||||||
|
length += 1;
|
||||||
}
|
}
|
||||||
length = (int)(buffer - (byte*)impl.Text);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
byte[] stringBuffer = new byte[length];
|
byte[] stringBuffer = new byte[length];
|
||||||
@@ -496,8 +496,6 @@ namespace SherpaOnnx
|
|||||||
return new OfflineStream(p);
|
return new OfflineStream(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// You have to ensure that IsReady(stream) returns true before
|
|
||||||
/// you call this method
|
|
||||||
public void Decode(OfflineStream stream)
|
public void Decode(OfflineStream stream)
|
||||||
{
|
{
|
||||||
Decode(_handle.Handle, stream.Handle);
|
Decode(_handle.Handle, stream.Handle);
|
||||||
@@ -549,4 +547,137 @@ namespace SherpaOnnx
|
|||||||
private static extern void Decode(IntPtr handle, IntPtr[] streams, int n);
|
private static extern void Decode(IntPtr handle, IntPtr[] streams, int n);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[StructLayout(LayoutKind.Sequential)]
|
||||||
|
public struct SpokenLanguageIdentificationWhisperConfig
|
||||||
|
{
|
||||||
|
public SpokenLanguageIdentificationWhisperConfig()
|
||||||
|
{
|
||||||
|
Encoder = "";
|
||||||
|
Decoder = "";
|
||||||
|
TailPaddings = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MarshalAs(UnmanagedType.LPStr)]
|
||||||
|
public string Encoder;
|
||||||
|
|
||||||
|
[MarshalAs(UnmanagedType.LPStr)]
|
||||||
|
public string Decoder;
|
||||||
|
|
||||||
|
public int TailPaddings;
|
||||||
|
}
|
||||||
|
|
||||||
|
public struct SpokenLanguageIdentificationConfig
|
||||||
|
{
|
||||||
|
public SpokenLanguageIdentificationConfig()
|
||||||
|
{
|
||||||
|
Whisper = new SpokenLanguageIdentificationWhisperConfig();
|
||||||
|
NumThreads = 1;
|
||||||
|
Debug = 0;
|
||||||
|
Provider = "cpu";
|
||||||
|
}
|
||||||
|
public SpokenLanguageIdentificationWhisperConfig Whisper;
|
||||||
|
|
||||||
|
public int NumThreads;
|
||||||
|
public int Debug;
|
||||||
|
|
||||||
|
[MarshalAs(UnmanagedType.LPStr)]
|
||||||
|
public string Provider;
|
||||||
|
}
|
||||||
|
|
||||||
|
public class SpokenLanguageIdentificationResult
|
||||||
|
{
|
||||||
|
public SpokenLanguageIdentificationResult(IntPtr handle)
|
||||||
|
{
|
||||||
|
Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl));
|
||||||
|
|
||||||
|
// PtrToStringUTF8() requires .net standard 2.1
|
||||||
|
// _text = Marshal.PtrToStringUTF8(impl.Text);
|
||||||
|
|
||||||
|
int length = 0;
|
||||||
|
|
||||||
|
unsafe
|
||||||
|
{
|
||||||
|
byte* buffer = (byte*)impl.Lang;
|
||||||
|
while (*buffer != 0)
|
||||||
|
{
|
||||||
|
++buffer;
|
||||||
|
length += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
byte[] stringBuffer = new byte[length];
|
||||||
|
Marshal.Copy(impl.Lang, stringBuffer, 0, length);
|
||||||
|
_lang = Encoding.UTF8.GetString(stringBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
[StructLayout(LayoutKind.Sequential)]
|
||||||
|
struct Impl
|
||||||
|
{
|
||||||
|
public IntPtr Lang;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String _lang;
|
||||||
|
public String Lang => _lang;
|
||||||
|
}
|
||||||
|
|
||||||
|
public class SpokenLanguageIdentification : IDisposable
|
||||||
|
{
|
||||||
|
public SpokenLanguageIdentification(SpokenLanguageIdentificationConfig config)
|
||||||
|
{
|
||||||
|
IntPtr h = SherpaOnnxCreateSpokenLanguageIdentification(ref config);
|
||||||
|
_handle = new HandleRef(this, h);
|
||||||
|
}
|
||||||
|
|
||||||
|
public OfflineStream CreateStream()
|
||||||
|
{
|
||||||
|
IntPtr p = SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(_handle.Handle);
|
||||||
|
return new OfflineStream(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
public SpokenLanguageIdentificationResult Compute(OfflineStream stream)
|
||||||
|
{
|
||||||
|
IntPtr h = SherpaOnnxSpokenLanguageIdentificationCompute(_handle.Handle, stream.Handle);
|
||||||
|
SpokenLanguageIdentificationResult result = new SpokenLanguageIdentificationResult(h);
|
||||||
|
SherpaOnnxDestroySpokenLanguageIdentificationResult(h);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Dispose()
|
||||||
|
{
|
||||||
|
Cleanup();
|
||||||
|
// Prevent the object from being placed on the
|
||||||
|
// finalization queue
|
||||||
|
System.GC.SuppressFinalize(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
~SpokenLanguageIdentification()
|
||||||
|
{
|
||||||
|
Cleanup();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void Cleanup()
|
||||||
|
{
|
||||||
|
SherpaOnnxDestroySpokenLanguageIdentification(_handle.Handle);
|
||||||
|
|
||||||
|
// Don't permit the handle to be used again.
|
||||||
|
_handle = new HandleRef(this, IntPtr.Zero);
|
||||||
|
}
|
||||||
|
|
||||||
|
private HandleRef _handle;
|
||||||
|
|
||||||
|
[DllImport(Dll.Filename)]
|
||||||
|
private static extern IntPtr SherpaOnnxCreateSpokenLanguageIdentification(ref SpokenLanguageIdentificationConfig config);
|
||||||
|
|
||||||
|
[DllImport(Dll.Filename)]
|
||||||
|
private static extern void SherpaOnnxDestroySpokenLanguageIdentification(IntPtr handle);
|
||||||
|
|
||||||
|
[DllImport(Dll.Filename)]
|
||||||
|
private static extern IntPtr SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(IntPtr handle);
|
||||||
|
|
||||||
|
[DllImport(Dll.Filename)]
|
||||||
|
private static extern IntPtr SherpaOnnxSpokenLanguageIdentificationCompute(IntPtr handle, IntPtr stream);
|
||||||
|
|
||||||
|
[DllImport(Dll.Filename)]
|
||||||
|
private static extern void SherpaOnnxDestroySpokenLanguageIdentificationResult(IntPtr handle);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user