Add Pascal/Go/C#/Dart API for NeMo Canary ASR models (#2367)

Add support for the new NeMo Canary ASR model across multiple language bindings by introducing a Canary model configuration and setter method on the offline recognizer.

- Define Canary model config in Pascal, Go, C#, Dart and update converter functions
- Add SetConfig API for offline recognizer (Pascal, Go, C#, Dart)
- Extend CI/workflows and example scripts to test non-streaming Canary decoding
This commit is contained in:
Fangjun Kuang
2025-07-10 14:53:33 +08:00
committed by GitHub
parent e2b2d5ea57
commit fd9a687ec2
27 changed files with 779 additions and 8 deletions

View File

@@ -323,7 +323,8 @@ class OnlineTransducerNeMoModel::Impl {
SHERPA_ONNX_READ_META_DATA(window_size_, "window_size");
SHERPA_ONNX_READ_META_DATA(chunk_shift_, "chunk_shift");
SHERPA_ONNX_READ_META_DATA(subsampling_factor_, "subsampling_factor");
SHERPA_ONNX_READ_META_DATA_STR(normalize_type_, "normalize_type");
SHERPA_ONNX_READ_META_DATA_STR_ALLOW_EMPTY(normalize_type_,
"normalize_type");
SHERPA_ONNX_READ_META_DATA(pred_rnn_layers_, "pred_rnn_layers");
SHERPA_ONNX_READ_META_DATA(pred_hidden_, "pred_hidden");

View File

@@ -299,6 +299,16 @@ type
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineWhisperModelConfig);
end;
TSherpaOnnxOfflineCanaryModelConfig = record
Encoder: AnsiString;
Decoder: AnsiString;
SrcLang: AnsiString;
TgtLang: AnsiString;
UsePnc: Boolean;
function ToString: AnsiString;
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineCanaryModelConfig);
end;
TSherpaOnnxOfflineMoonshineModelConfig = record
Preprocessor: AnsiString;
Encoder: AnsiString;
@@ -352,6 +362,7 @@ type
FireRedAsr: TSherpaOnnxOfflineFireRedAsrModelConfig;
Dolphin: TSherpaOnnxOfflineDolphinModelConfig;
ZipformerCtc: TSherpaOnnxOfflineZipformerCtcModelConfig;
Canary: TSherpaOnnxOfflineCanaryModelConfig;
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig);
function ToString: AnsiString;
end;
@@ -398,6 +409,7 @@ type
destructor Destroy; override;
function CreateStream: TSherpaOnnxOfflineStream;
procedure Decode(Stream: TSherpaOnnxOfflineStream);
procedure SetConfig(Config: TSherpaOnnxOfflineRecognizerConfig);
function GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult;
property Config: TSherpaOnnxOfflineRecognizerConfig Read _Config;
property GetHandle: Pointer Read Handle;
@@ -742,6 +754,13 @@ type
Task: PAnsiChar;
TailPaddings: cint32;
end;
SherpaOnnxOfflineCanaryModelConfig = record
Encoder: PAnsiChar;
Decoder: PAnsiChar;
SrcLang: PAnsiChar;
TgtLang: PAnsiChar;
UsePnc: cint32;
end;
SherpaOnnxOfflineFireRedAsrModelConfig = record
Encoder: PAnsiChar;
Decoder: PAnsiChar;
@@ -783,6 +802,7 @@ type
FireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig;
Dolphin: SherpaOnnxOfflineDolphinModelConfig;
ZipformerCtc: SherpaOnnxOfflineZipformerCtcModelConfig;
Canary: SherpaOnnxOfflineCanaryModelConfig;
end;
SherpaOnnxOfflineRecognizerConfig = record
@@ -1197,6 +1217,9 @@ procedure SherpaOnnxAcceptWaveformOffline(Stream: Pointer;
procedure SherpaOnnxDecodeOfflineStream(Recognizer: Pointer; Stream: Pointer); cdecl;
external SherpaOnnxLibName;
procedure SherpaOnnxOfflineRecognizerSetConfig(Recognizer: Pointer; Config: PSherpaOnnxOfflineRecognizerConfig); cdecl;
external SherpaOnnxLibName;
function SherpaOnnxGetOfflineStreamResultAsJson(Stream: Pointer): PAnsiChar; cdecl;
external SherpaOnnxLibName;
@@ -1564,6 +1587,19 @@ begin
[Self.Encoder, Self.Decoder, Self.Language, Self.Task, Self.TailPaddings]);
end;
function TSherpaOnnxOfflineCanaryModelConfig.ToString: AnsiString;
begin
Result := Format('TSherpaOnnxOfflineCanaryModelConfig(' +
'Encoder := %s, ' +
'Decoder := %s, ' +
'SrcLang := %s, ' +
'TgtLang := %s, ' +
'UsePnc := %s' +
')',
[Self.Encoder, Self.Decoder, Self.SrcLang,
Self.TgtLang, Self.UsePnc.ToString]);
end;
function TSherpaOnnxOfflineFireRedAsrModelConfig.ToString: AnsiString;
begin
Result := Format('TSherpaOnnxOfflineFireRedAsrModelConfig(' +
@@ -1627,14 +1663,16 @@ begin
'Moonshine := %s, ' +
'FireRedAsr := %s, ' +
'Dolphin := %s, ' +
'ZipformerCtc := %s' +
'ZipformerCtc := %s, ' +
'Canary := %s' +
')',
[Self.Transducer.ToString, Self.Paraformer.ToString,
Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString,
Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider,
Self.ModelType, Self.ModelingUnit, Self.BpeVocab,
Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString,
Self.FireRedAsr.ToString, Self.Dolphin.ToString, Self.ZipformerCtc.ToString
Self.FireRedAsr.ToString, Self.Dolphin.ToString,
Self.ZipformerCtc.ToString, Self.Canary.ToString
]);
end;
@@ -1660,7 +1698,7 @@ begin
]);
end;
constructor TSherpaOnnxOfflineRecognizer.Create(Config: TSherpaOnnxOfflineRecognizerConfig);
function ConvertOfflineRecognizerConfig(Config: TSherpaOnnxOfflineRecognizerConfig): SherpaOnnxOfflineRecognizerConfig;
var
C: SherpaOnnxOfflineRecognizerConfig;
begin
@@ -1707,6 +1745,12 @@ begin
C.ModelConfig.Dolphin.Model := PAnsiChar(Config.ModelConfig.Dolphin.Model);
C.ModelConfig.ZipformerCtc.Model := PAnsiChar(Config.ModelConfig.ZipformerCtc.Model);
C.ModelConfig.Canary.Encoder := PAnsiChar(Config.ModelConfig.Canary.Encoder);
C.ModelConfig.Canary.Decoder := PAnsiChar(Config.ModelConfig.Canary.Decoder);
C.ModelConfig.Canary.SrcLang := PAnsiChar(Config.ModelConfig.Canary.SrcLang);
C.ModelConfig.Canary.TgtLang := PAnsiChar(Config.ModelConfig.Canary.TgtLang);
C.ModelConfig.Canary.UsePnc := Ord(Config.ModelConfig.Canary.UsePnc);
C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model);
C.LMConfig.Scale := Config.LMConfig.Scale;
@@ -1722,10 +1766,27 @@ begin
C.Hr.Lexicon := PAnsiChar(Config.Hr.Lexicon);
C.Hr.RuleFsts := PAnsiChar(Config.Hr.RuleFsts);
Result := C;
end;
constructor TSherpaOnnxOfflineRecognizer.Create(Config: TSherpaOnnxOfflineRecognizerConfig);
var
C: SherpaOnnxOfflineRecognizerConfig;
begin
C := ConvertOfflineRecognizerConfig(Config);
Self.Handle := SherpaOnnxCreateOfflineRecognizer(@C);
Self._Config := Config;
end;
procedure TSherpaOnnxOfflineRecognizer.SetConfig(Config: TSherpaOnnxOfflineRecognizerConfig);
var
C: SherpaOnnxOfflineRecognizerConfig;
begin
C := ConvertOfflineRecognizerConfig(Config);
SherpaOnnxOfflineRecognizerSetConfig(Self.Handle, @C);
{ We don't update Self._Config }
end;
destructor TSherpaOnnxOfflineRecognizer.Destroy;
begin
SherpaOnnxDestroyOfflineRecognizer(Self.Handle);
@@ -1912,6 +1973,13 @@ begin
Dest.TailPaddings := -1;
end;
class operator TSherpaOnnxOfflineCanaryModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineCanaryModelConfig);
begin
Dest.SrcLang := 'en';
Dest.TgtLang := 'en';
Dest.UsePnc := True;
end;
class operator TSherpaOnnxOfflineLMConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineLMConfig);
begin
Dest.Scale := 1.0;