Add Pascal/Go/C#/Dart API for NeMo Canary ASR models (#2367)

Add support for the new NeMo Canary ASR model across multiple language bindings by introducing a Canary model configuration and setter method on the offline recognizer.

- Define Canary model config in Pascal, Go, C#, Dart and update converter functions
- Add SetConfig API for offline recognizer (Pascal, Go, C#, Dart)
- Extend CI/workflows and example scripts to test non-streaming Canary decoding
This commit is contained in:
Fangjun Kuang
2025-07-10 14:53:33 +08:00
committed by GitHub
parent e2b2d5ea57
commit fd9a687ec2
27 changed files with 779 additions and 8 deletions

View File

@@ -0,0 +1,44 @@
// Copyright (c) 2025 Xiaomi Corporation
//
// This file shows how to use a NeMo Canary model for speech recognition.
//
// You can find the model doc at
// https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html
using SherpaOnnx;
class NonStreamingAsrCanary
{
static void Main(string[] args)
{
// please download model files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
var config = new OfflineRecognizerConfig();
config.ModelConfig.Canary.Encoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx";
config.ModelConfig.Canary.Decoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx";
config.ModelConfig.Canary.SrcLang = "en";
config.ModelConfig.Canary.TgtLang = "en";
config.ModelConfig.Tokens = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt";
config.ModelConfig.Debug = 0;
var recognizer = new OfflineRecognizer(config);
var testWaveFilename = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav";
var reader = new WaveReader(testWaveFilename);
var stream = recognizer.CreateStream();
stream.AcceptWaveform(reader.SampleRate, reader.Samples);
recognizer.Decode(stream);
var text = stream.Result.Text;
Console.WriteLine("Text (English): {0}", text);
// Now output text in German
config.ModelConfig.Canary.TgtLang = "de";
recognizer.SetConfig(config);
stream = recognizer.CreateStream();
stream.AcceptWaveform(reader.SampleRate, reader.Samples);
recognizer.Decode(stream);
text = stream.Result.Text;
Console.WriteLine("Text (German): {0}", text);
}
}

View File

@@ -0,0 +1,15 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<RootNamespace>non_streaming_canary_decode_files</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\Common\Common.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,11 @@
#!/usr/bin/env bash
set -ex
if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
fi
dotnet run

View File

@@ -39,6 +39,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speech-enhancement-gtcrn",
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "version-test", "version-test\version-test.csproj", "{E57711E5-6546-4BA0-B627-79C94F415BC5}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "non-streaming-canary-decode-files", "non-streaming-canary-decode-files\non-streaming-canary-decode-files.csproj", "{925779DB-4429-4366-87C3-B14DD44AE1D4}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -117,6 +119,10 @@ Global
{E57711E5-6546-4BA0-B627-79C94F415BC5}.Debug|Any CPU.Build.0 = Debug|Any CPU
{E57711E5-6546-4BA0-B627-79C94F415BC5}.Release|Any CPU.ActiveCfg = Release|Any CPU
{E57711E5-6546-4BA0-B627-79C94F415BC5}.Release|Any CPU.Build.0 = Release|Any CPU
{925779DB-4429-4366-87C3-B14DD44AE1D4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{925779DB-4429-4366-87C3-B14DD44AE1D4}.Debug|Any CPU.Build.0 = Debug|Any CPU
{925779DB-4429-4366-87C3-B14DD44AE1D4}.Release|Any CPU.ActiveCfg = Release|Any CPU
{925779DB-4429-4366-87C3-B14DD44AE1D4}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE