Add support for the new NeMo Canary ASR model across multiple language bindings by introducing a Canary model configuration and setter method on the offline recognizer. - Define Canary model config in Pascal, Go, C#, Dart and update converter functions - Add SetConfig API for offline recognizer (Pascal, Go, C#, Dart) - Extend CI/workflows and example scripts to test non-streaming Canary decoding
108 lines
3.1 KiB
ObjectPascal
108 lines
3.1 KiB
ObjectPascal
{ Copyright (c) 2025 Xiaomi Corporation }
|
|
|
|
{
|
|
This file shows how to use a non-streaming NeMo Canary model
|
|
to decode files.
|
|
|
|
You can download the model files from
|
|
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
|
}
|
|
|
|
program nemo_canary;
|
|
|
|
{$mode objfpc}
|
|
|
|
uses
|
|
sherpa_onnx,
|
|
DateUtils,
|
|
SysUtils;
|
|
|
|
var
|
|
Wave: TSherpaOnnxWave;
|
|
WaveFilename: AnsiString;
|
|
|
|
Config: TSherpaOnnxOfflineRecognizerConfig;
|
|
Recognizer: TSherpaOnnxOfflineRecognizer;
|
|
Stream: TSherpaOnnxOfflineStream;
|
|
RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
|
|
|
|
Start: TDateTime;
|
|
Stop: TDateTime;
|
|
|
|
Elapsed: Single;
|
|
Duration: Single;
|
|
RealTimeFactor: Single;
|
|
begin
|
|
Initialize(Config);
|
|
|
|
Config.ModelConfig.Canary.Encoder := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx';
|
|
Config.ModelConfig.Canary.Decoder := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx';
|
|
Config.ModelConfig.Canary.SrcLang := 'en';
|
|
Config.ModelConfig.Canary.TgtLang := 'en';
|
|
Config.ModelConfig.Canary.UsePnc := True;
|
|
Config.ModelConfig.Tokens := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt';
|
|
Config.ModelConfig.Provider := 'cpu';
|
|
Config.ModelConfig.NumThreads := 1;
|
|
Config.ModelConfig.Debug := False;
|
|
|
|
WaveFilename := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav';
|
|
|
|
Wave := SherpaOnnxReadWave(WaveFilename);
|
|
|
|
Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
|
|
Stream := Recognizer.CreateStream();
|
|
Start := Now;
|
|
|
|
Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
|
|
Recognizer.Decode(Stream);
|
|
|
|
RecognitionResult := Recognizer.GetResult(Stream);
|
|
|
|
Stop := Now;
|
|
|
|
Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
|
|
Duration := Length(Wave.Samples) / Wave.SampleRate;
|
|
RealTimeFactor := Elapsed / Duration;
|
|
|
|
WriteLn(RecognitionResult.ToString);
|
|
WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
|
|
WriteLn(Format('Elapsed %.3f s', [Elapsed]));
|
|
WriteLn(Format('Wave duration %.3f s', [Duration]));
|
|
WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
|
|
|
|
FreeAndNil(Stream);
|
|
|
|
WriteLn('-----------Output German-----');
|
|
|
|
Stream := Recognizer.CreateStream();
|
|
Start := Now;
|
|
|
|
Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
|
|
|
|
Config.ModelConfig.Canary.TgtLang := 'de';
|
|
Recognizer.SetConfig(Config);
|
|
Recognizer.Decode(Stream);
|
|
|
|
RecognitionResult := Recognizer.GetResult(Stream);
|
|
|
|
Stop := Now;
|
|
|
|
Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
|
|
Duration := Length(Wave.Samples) / Wave.SampleRate;
|
|
RealTimeFactor := Elapsed / Duration;
|
|
|
|
WriteLn(RecognitionResult.ToString);
|
|
WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
|
|
WriteLn(Format('Elapsed %.3f s', [Elapsed]));
|
|
WriteLn(Format('Wave duration %.3f s', [Duration]));
|
|
WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
|
|
|
|
{Free resources to avoid memory leak.
|
|
|
|
Note: You don't need to invoke them for this simple script.
|
|
However, you have to invoke them in your own large/complex project.
|
|
}
|
|
FreeAndNil(Stream);
|
|
FreeAndNil(Recognizer);
|
|
end.
|