Add C# API for spoken language identification (#697)
This commit is contained in:
30
.github/scripts/test-dot-net.sh
vendored
Executable file
30
.github/scripts/test-dot-net.sh
vendored
Executable file
@@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
cd dotnet-examples/
|
||||
|
||||
cd spoken-language-identification
|
||||
./run.sh
|
||||
|
||||
cd ../online-decode-files
|
||||
./run-zipformer2-ctc.sh
|
||||
./run-transducer.sh
|
||||
./run-paraformer.sh
|
||||
|
||||
cd ../offline-decode-files
|
||||
./run-nemo-ctc.sh
|
||||
./run-paraformer.sh
|
||||
./run-zipformer.sh
|
||||
./run-hotwords.sh
|
||||
./run-whisper.sh
|
||||
./run-tdnn-yesno.sh
|
||||
|
||||
cd ../offline-tts
|
||||
./run-aishell3.sh
|
||||
./run-piper.sh
|
||||
ls -lh
|
||||
|
||||
cd ../..
|
||||
|
||||
mkdir tts
|
||||
|
||||
cp dotnet-examples/offline-tts/*.wav ./tts
|
||||
27
.github/workflows/test-dot-net-nuget.yaml
vendored
27
.github/workflows/test-dot-net-nuget.yaml
vendored
@@ -40,33 +40,10 @@ jobs:
|
||||
- name: Check dotnet
|
||||
run: dotnet --info
|
||||
|
||||
- name: Decode a file
|
||||
- name: Run tests
|
||||
shell: bash
|
||||
run: |
|
||||
cd dotnet-examples/
|
||||
|
||||
cd online-decode-files
|
||||
./run-transducer.sh
|
||||
./run-paraformer.sh
|
||||
|
||||
cd ../offline-decode-files
|
||||
./run-nemo-ctc.sh
|
||||
./run-paraformer.sh
|
||||
./run-zipformer.sh
|
||||
./run-hotwords.sh
|
||||
./run-whisper.sh
|
||||
./run-tdnn-yesno.sh
|
||||
|
||||
cd ../offline-tts
|
||||
./run-aishell3.sh
|
||||
./run-piper.sh
|
||||
ls -lh
|
||||
|
||||
cd ../..
|
||||
|
||||
mkdir tts
|
||||
|
||||
cp dotnet-examples/offline-tts/*.wav ./tts
|
||||
.github/scripts/test-dot-net.sh
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
|
||||
31
.github/workflows/test-dot-net.yaml
vendored
31
.github/workflows/test-dot-net.yaml
vendored
@@ -177,39 +177,16 @@ jobs:
|
||||
cp -v scripts/dotnet/examples/offline-decode-files.csproj dotnet-examples/offline-decode-files/
|
||||
cp -v scripts/dotnet/examples/online-decode-files.csproj dotnet-examples/online-decode-files/
|
||||
cp -v scripts/dotnet/examples/speech-recognition-from-microphone.csproj dotnet-examples/speech-recognition-from-microphone/
|
||||
cp -v scripts/dotnet/examples/spoken-language-identification.csproj dotnet-examples/spoken-language-identification/
|
||||
|
||||
ls -lh /tmp
|
||||
|
||||
- name: Decode a file
|
||||
- name: Run tests
|
||||
shell: bash
|
||||
run: |
|
||||
cd dotnet-examples/
|
||||
.github/scripts/test-dot-net.sh
|
||||
|
||||
cd online-decode-files
|
||||
./run-zipformer2-ctc.sh
|
||||
./run-transducer.sh
|
||||
./run-paraformer.sh
|
||||
|
||||
cd ../offline-decode-files
|
||||
./run-nemo-ctc.sh
|
||||
./run-paraformer.sh
|
||||
./run-zipformer.sh
|
||||
./run-hotwords.sh
|
||||
./run-whisper.sh
|
||||
./run-tdnn-yesno.sh
|
||||
|
||||
cd ../offline-tts
|
||||
./run-aishell3.sh
|
||||
./run-piper.sh
|
||||
ls -lh
|
||||
|
||||
cd ../..
|
||||
|
||||
mkdir tts
|
||||
|
||||
cp dotnet-examples/offline-tts/*.wav ./tts
|
||||
|
||||
- uses: actions/upload-artifact@v3
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: dot-net-tts-generated-test-files-${{ matrix.os }}
|
||||
path: tts
|
||||
|
||||
@@ -13,6 +13,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-tts", "offline-tts\
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-tts-play", "offline-tts-play\offline-tts-play.csproj", "{40781464-5948-462B-BA4B-98932711513F}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "spoken-language-identification", "spoken-language-identification\spoken-language-identification.csproj", "{3D7CF3D6-AC45-4D50-9619-5687B1443E94}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
@@ -42,5 +44,9 @@ Global
|
||||
{40781464-5948-462B-BA4B-98932711513F}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{40781464-5948-462B-BA4B-98932711513F}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{40781464-5948-462B-BA4B-98932711513F}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
|
||||
42
dotnet-examples/spoken-language-identification/Program.cs
Normal file
42
dotnet-examples/spoken-language-identification/Program.cs
Normal file
@@ -0,0 +1,42 @@
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
//
|
||||
// This file shows how to do spoken language identification with whisper.
|
||||
//
|
||||
// 1. Download a whisper multilingual model. We use a tiny model below.
|
||||
// Please refer to https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||
// to download more models.
|
||||
//
|
||||
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
|
||||
// tar xvf sherpa-onnx-whisper-tiny.tar.bz2
|
||||
// rm sherpa-onnx-whisper-tiny.tar.bz2
|
||||
//
|
||||
// 2. Now run it
|
||||
//
|
||||
// dotnet run
|
||||
|
||||
using SherpaOnnx;
|
||||
using System.Collections.Generic;
|
||||
using System;
|
||||
|
||||
class SpokenLanguageIdentificationDemo
|
||||
{
|
||||
|
||||
static void Main(string[] args)
|
||||
{
|
||||
var config = new SpokenLanguageIdentificationConfig();
|
||||
config.Whisper.Encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx";
|
||||
config.Whisper.Decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx";
|
||||
|
||||
var slid = new SpokenLanguageIdentification(config);
|
||||
var filename = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav";
|
||||
|
||||
WaveReader waveReader = new WaveReader(filename);
|
||||
|
||||
var s = slid.CreateStream();
|
||||
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
|
||||
var result = slid.Compute(s);
|
||||
Console.WriteLine($"Filename: {filename}");
|
||||
Console.WriteLine($"Detected language: {result.Lang}");
|
||||
}
|
||||
}
|
||||
|
||||
1
dotnet-examples/spoken-language-identification/WaveReader.cs
Symbolic link
1
dotnet-examples/spoken-language-identification/WaveReader.cs
Symbolic link
@@ -0,0 +1 @@
|
||||
../offline-decode-files/WaveReader.cs
|
||||
12
dotnet-examples/spoken-language-identification/run.sh
Executable file
12
dotnet-examples/spoken-language-identification/run.sh
Executable file
@@ -0,0 +1,12 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [ ! -d ./sherpa-onnx-whisper-tiny ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
|
||||
tar xvf sherpa-onnx-whisper-tiny.tar.bz2
|
||||
rm sherpa-onnx-whisper-tiny.tar.bz2
|
||||
fi
|
||||
|
||||
dotnet run
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net6.0</TargetFramework>
|
||||
<RootNamespace>spoken_language_identification</RootNamespace>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,19 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net6.0</TargetFramework>
|
||||
<RootNamespace>spoken_language_identification</RootNamespace>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<PropertyGroup>
|
||||
<RestoreSources>/tmp/packages;$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -403,8 +403,8 @@ namespace SherpaOnnx
|
||||
while (*buffer != 0)
|
||||
{
|
||||
++buffer;
|
||||
length += 1;
|
||||
}
|
||||
length = (int)(buffer - (byte*)impl.Text);
|
||||
}
|
||||
|
||||
byte[] stringBuffer = new byte[length];
|
||||
@@ -496,8 +496,6 @@ namespace SherpaOnnx
|
||||
return new OfflineStream(p);
|
||||
}
|
||||
|
||||
/// You have to ensure that IsReady(stream) returns true before
|
||||
/// you call this method
|
||||
public void Decode(OfflineStream stream)
|
||||
{
|
||||
Decode(_handle.Handle, stream.Handle);
|
||||
@@ -549,4 +547,137 @@ namespace SherpaOnnx
|
||||
private static extern void Decode(IntPtr handle, IntPtr[] streams, int n);
|
||||
}
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
public struct SpokenLanguageIdentificationWhisperConfig
|
||||
{
|
||||
public SpokenLanguageIdentificationWhisperConfig()
|
||||
{
|
||||
Encoder = "";
|
||||
Decoder = "";
|
||||
TailPaddings = -1;
|
||||
}
|
||||
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string Encoder;
|
||||
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string Decoder;
|
||||
|
||||
public int TailPaddings;
|
||||
}
|
||||
|
||||
public struct SpokenLanguageIdentificationConfig
|
||||
{
|
||||
public SpokenLanguageIdentificationConfig()
|
||||
{
|
||||
Whisper = new SpokenLanguageIdentificationWhisperConfig();
|
||||
NumThreads = 1;
|
||||
Debug = 0;
|
||||
Provider = "cpu";
|
||||
}
|
||||
public SpokenLanguageIdentificationWhisperConfig Whisper;
|
||||
|
||||
public int NumThreads;
|
||||
public int Debug;
|
||||
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string Provider;
|
||||
}
|
||||
|
||||
public class SpokenLanguageIdentificationResult
|
||||
{
|
||||
public SpokenLanguageIdentificationResult(IntPtr handle)
|
||||
{
|
||||
Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl));
|
||||
|
||||
// PtrToStringUTF8() requires .net standard 2.1
|
||||
// _text = Marshal.PtrToStringUTF8(impl.Text);
|
||||
|
||||
int length = 0;
|
||||
|
||||
unsafe
|
||||
{
|
||||
byte* buffer = (byte*)impl.Lang;
|
||||
while (*buffer != 0)
|
||||
{
|
||||
++buffer;
|
||||
length += 1;
|
||||
}
|
||||
}
|
||||
|
||||
byte[] stringBuffer = new byte[length];
|
||||
Marshal.Copy(impl.Lang, stringBuffer, 0, length);
|
||||
_lang = Encoding.UTF8.GetString(stringBuffer);
|
||||
}
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
struct Impl
|
||||
{
|
||||
public IntPtr Lang;
|
||||
}
|
||||
|
||||
private String _lang;
|
||||
public String Lang => _lang;
|
||||
}
|
||||
|
||||
public class SpokenLanguageIdentification : IDisposable
|
||||
{
|
||||
public SpokenLanguageIdentification(SpokenLanguageIdentificationConfig config)
|
||||
{
|
||||
IntPtr h = SherpaOnnxCreateSpokenLanguageIdentification(ref config);
|
||||
_handle = new HandleRef(this, h);
|
||||
}
|
||||
|
||||
public OfflineStream CreateStream()
|
||||
{
|
||||
IntPtr p = SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(_handle.Handle);
|
||||
return new OfflineStream(p);
|
||||
}
|
||||
|
||||
public SpokenLanguageIdentificationResult Compute(OfflineStream stream)
|
||||
{
|
||||
IntPtr h = SherpaOnnxSpokenLanguageIdentificationCompute(_handle.Handle, stream.Handle);
|
||||
SpokenLanguageIdentificationResult result = new SpokenLanguageIdentificationResult(h);
|
||||
SherpaOnnxDestroySpokenLanguageIdentificationResult(h);
|
||||
return result;
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
Cleanup();
|
||||
// Prevent the object from being placed on the
|
||||
// finalization queue
|
||||
System.GC.SuppressFinalize(this);
|
||||
}
|
||||
|
||||
~SpokenLanguageIdentification()
|
||||
{
|
||||
Cleanup();
|
||||
}
|
||||
|
||||
private void Cleanup()
|
||||
{
|
||||
SherpaOnnxDestroySpokenLanguageIdentification(_handle.Handle);
|
||||
|
||||
// Don't permit the handle to be used again.
|
||||
_handle = new HandleRef(this, IntPtr.Zero);
|
||||
}
|
||||
|
||||
private HandleRef _handle;
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern IntPtr SherpaOnnxCreateSpokenLanguageIdentification(ref SpokenLanguageIdentificationConfig config);
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern void SherpaOnnxDestroySpokenLanguageIdentification(IntPtr handle);
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern IntPtr SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(IntPtr handle);
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern IntPtr SherpaOnnxSpokenLanguageIdentificationCompute(IntPtr handle, IntPtr stream);
|
||||
|
||||
[DllImport(Dll.Filename)]
|
||||
private static extern void SherpaOnnxDestroySpokenLanguageIdentificationResult(IntPtr handle);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user