Add Pascal/Go/C#/Dart API for NeMo Canary ASR models (#2367)
Add support for the new NeMo Canary ASR model across multiple language bindings by introducing a Canary model configuration and setter method on the offline recognizer. - Define Canary model config in Pascal, Go, C#, Dart and update converter functions - Add SetConfig API for offline recognizer (Pascal, Go, C#, Dart) - Extend CI/workflows and example scripts to test non-streaming Canary decoding
This commit is contained in:
5
.github/scripts/test-dot-net.sh
vendored
5
.github/scripts/test-dot-net.sh
vendored
@@ -6,6 +6,11 @@ cd ./version-test
|
|||||||
./run.sh
|
./run.sh
|
||||||
ls -lh
|
ls -lh
|
||||||
|
|
||||||
|
cd ../non-streaming-canary-decode-files
|
||||||
|
./run.sh
|
||||||
|
ls -lh
|
||||||
|
rm -rf sherpa-onnx-nemo-*
|
||||||
|
|
||||||
cd ../offline-decode-files
|
cd ../offline-decode-files
|
||||||
|
|
||||||
./run-zipformer-ctc.sh
|
./run-zipformer-ctc.sh
|
||||||
|
|||||||
4
.github/workflows/pascal.yaml
vendored
4
.github/workflows/pascal.yaml
vendored
@@ -156,6 +156,10 @@ jobs:
|
|||||||
|
|
||||||
pushd non-streaming-asr
|
pushd non-streaming-asr
|
||||||
|
|
||||||
|
./run-nemo-canary.sh
|
||||||
|
rm -rf sherpa-onnx-*
|
||||||
|
echo "---"
|
||||||
|
|
||||||
./run-zipformer-ctc.sh
|
./run-zipformer-ctc.sh
|
||||||
rm -rf sherpa-onnx-*
|
rm -rf sherpa-onnx-*
|
||||||
echo "---"
|
echo "---"
|
||||||
|
|||||||
8
.github/workflows/test-go-package.yaml
vendored
8
.github/workflows/test-go-package.yaml
vendored
@@ -76,6 +76,14 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
gcc --version
|
gcc --version
|
||||||
|
|
||||||
|
- name: Test NeMo Canary ASR
|
||||||
|
if: matrix.os != 'windows-latest'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
cd go-api-examples/non-streaming-canary-decode-files
|
||||||
|
./run.sh
|
||||||
|
rm -rf sherpa-onnx-nemo-*
|
||||||
|
|
||||||
- name: Test speech enhancement (GTCRN)
|
- name: Test speech enhancement (GTCRN)
|
||||||
if: matrix.os != 'windows-latest'
|
if: matrix.os != 'windows-latest'
|
||||||
shell: bash
|
shell: bash
|
||||||
|
|||||||
14
.github/workflows/test-go.yaml
vendored
14
.github/workflows/test-go.yaml
vendored
@@ -108,6 +108,7 @@ jobs:
|
|||||||
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/add-punctuation
|
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/add-punctuation
|
||||||
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/audio-tagging
|
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/audio-tagging
|
||||||
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/keyword-spotting-from-file/
|
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/keyword-spotting-from-file/
|
||||||
|
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-canary-decode-files/
|
||||||
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-decode-files/
|
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-decode-files/
|
||||||
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-speaker-diarization/
|
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-speaker-diarization/
|
||||||
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-tts/
|
cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-tts/
|
||||||
@@ -148,6 +149,19 @@ jobs:
|
|||||||
name: ${{ matrix.os }}-libs
|
name: ${{ matrix.os }}-libs
|
||||||
path: to-upload/
|
path: to-upload/
|
||||||
|
|
||||||
|
- name: Test non-streaming decoding files with NeMo Canary
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
cd scripts/go/_internal/non-streaming-canary-decode-files/
|
||||||
|
ls -lh
|
||||||
|
go mod tidy
|
||||||
|
cat go.mod
|
||||||
|
go build
|
||||||
|
ls -lh
|
||||||
|
|
||||||
|
./run.sh
|
||||||
|
rm -rf sherpa-onnx-nemo-*
|
||||||
|
|
||||||
- name: Test streaming decoding files
|
- name: Test streaming decoding files
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
84
dart-api-examples/non-streaming-asr/bin/nemo-canary.dart
Normal file
84
dart-api-examples/non-streaming-asr/bin/nemo-canary.dart
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
// Copyright (c) 2025 Xiaomi Corporation
|
||||||
|
import 'dart:io';
|
||||||
|
|
||||||
|
import 'package:args/args.dart';
|
||||||
|
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
|
||||||
|
|
||||||
|
import './init.dart';
|
||||||
|
|
||||||
|
void main(List<String> arguments) async {
|
||||||
|
await initSherpaOnnx();
|
||||||
|
|
||||||
|
final parser = ArgParser()
|
||||||
|
..addOption('encoder', help: 'Path to the NeMo Canary encoder model')
|
||||||
|
..addOption('decoder', help: 'Path to the NeMo Canary decoder model')
|
||||||
|
..addOption('src-lang', help: 'Language of the input audio')
|
||||||
|
..addOption('tgt-lang', help: 'Language of the recognition result')
|
||||||
|
..addOption('tokens', help: 'Path to tokens.txt')
|
||||||
|
..addOption('input-wav', help: 'Path to input.wav to transcribe');
|
||||||
|
|
||||||
|
final res = parser.parse(arguments);
|
||||||
|
if (res['encoder'] == null ||
|
||||||
|
res['decoder'] == null ||
|
||||||
|
res['src-lang'] == null ||
|
||||||
|
res['tgt-lang'] == null ||
|
||||||
|
res['tokens'] == null ||
|
||||||
|
res['input-wav'] == null) {
|
||||||
|
print(parser.usage);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
final encoder = res['encoder'] as String;
|
||||||
|
final decoder = res['decoder'] as String;
|
||||||
|
final srcLang = res['src-lang'] as String;
|
||||||
|
final tgtLang = res['tgt-lang'] as String;
|
||||||
|
final tokens = res['tokens'] as String;
|
||||||
|
final inputWav = res['input-wav'] as String;
|
||||||
|
|
||||||
|
final canary = sherpa_onnx.OfflineCanaryModelConfig(
|
||||||
|
encoder: encoder, decoder: decoder, srcLang: srcLang, tgtLang: tgtLang);
|
||||||
|
|
||||||
|
final modelConfig = sherpa_onnx.OfflineModelConfig(
|
||||||
|
canary: canary,
|
||||||
|
tokens: tokens,
|
||||||
|
debug: false,
|
||||||
|
numThreads: 1,
|
||||||
|
);
|
||||||
|
var config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
|
||||||
|
final recognizer = sherpa_onnx.OfflineRecognizer(config);
|
||||||
|
|
||||||
|
final waveData = sherpa_onnx.readWave(inputWav);
|
||||||
|
final stream = recognizer.createStream();
|
||||||
|
|
||||||
|
stream.acceptWaveform(
|
||||||
|
samples: waveData.samples, sampleRate: waveData.sampleRate);
|
||||||
|
recognizer.decode(stream);
|
||||||
|
|
||||||
|
final result = recognizer.getResult(stream);
|
||||||
|
print('Result in $tgtLang: ${result.text}');
|
||||||
|
|
||||||
|
stream.free();
|
||||||
|
|
||||||
|
// Example to change the target language to de
|
||||||
|
if (tgtLang != 'en') {
|
||||||
|
var json = config.toJson();
|
||||||
|
|
||||||
|
((json['model'] as Map<String, dynamic>)!['canary']
|
||||||
|
as Map<String, dynamic>)!['tgtLang'] = 'en';
|
||||||
|
|
||||||
|
config = sherpa_onnx.OfflineRecognizerConfig.fromJson(json);
|
||||||
|
recognizer.setConfig(config);
|
||||||
|
|
||||||
|
final stream = recognizer.createStream();
|
||||||
|
|
||||||
|
stream.acceptWaveform(
|
||||||
|
samples: waveData.samples, sampleRate: waveData.sampleRate);
|
||||||
|
recognizer.decode(stream);
|
||||||
|
|
||||||
|
final result = recognizer.getResult(stream);
|
||||||
|
print('Result in English: ${result.text}');
|
||||||
|
stream.free();
|
||||||
|
}
|
||||||
|
|
||||||
|
recognizer.free();
|
||||||
|
}
|
||||||
33
dart-api-examples/non-streaming-asr/run-nemo-canary.sh
Executable file
33
dart-api-examples/non-streaming-asr/run-nemo-canary.sh
Executable file
@@ -0,0 +1,33 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
dart pub get
|
||||||
|
|
||||||
|
if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||||
|
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
for tgt_lang in en de es fr; do
|
||||||
|
dart run \
|
||||||
|
./bin/nemo-canary.dart \
|
||||||
|
--encoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx \
|
||||||
|
--decoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx \
|
||||||
|
--tokens ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt \
|
||||||
|
--src-lang en \
|
||||||
|
--tgt-lang $tgt_lang \
|
||||||
|
--input-wav ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav
|
||||||
|
done
|
||||||
|
|
||||||
|
for tgt_lang in en de; do
|
||||||
|
dart run \
|
||||||
|
./bin/nemo-canary.dart \
|
||||||
|
--encoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx \
|
||||||
|
--decoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx \
|
||||||
|
--tokens ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt \
|
||||||
|
--src-lang de \
|
||||||
|
--tgt-lang $tgt_lang \
|
||||||
|
--input-wav ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/de.wav
|
||||||
|
done
|
||||||
44
dotnet-examples/non-streaming-canary-decode-files/Program.cs
Normal file
44
dotnet-examples/non-streaming-canary-decode-files/Program.cs
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
// Copyright (c) 2025 Xiaomi Corporation
|
||||||
|
//
|
||||||
|
// This file shows how to use a NeMo Canary model for speech recognition.
|
||||||
|
//
|
||||||
|
// You can find the model doc at
|
||||||
|
// https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html
|
||||||
|
using SherpaOnnx;
|
||||||
|
|
||||||
|
class NonStreamingAsrCanary
|
||||||
|
{
|
||||||
|
static void Main(string[] args)
|
||||||
|
{
|
||||||
|
// please download model files from
|
||||||
|
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
var config = new OfflineRecognizerConfig();
|
||||||
|
config.ModelConfig.Canary.Encoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx";
|
||||||
|
config.ModelConfig.Canary.Decoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx";
|
||||||
|
config.ModelConfig.Canary.SrcLang = "en";
|
||||||
|
config.ModelConfig.Canary.TgtLang = "en";
|
||||||
|
config.ModelConfig.Tokens = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt";
|
||||||
|
config.ModelConfig.Debug = 0;
|
||||||
|
var recognizer = new OfflineRecognizer(config);
|
||||||
|
|
||||||
|
var testWaveFilename = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav";
|
||||||
|
var reader = new WaveReader(testWaveFilename);
|
||||||
|
var stream = recognizer.CreateStream();
|
||||||
|
stream.AcceptWaveform(reader.SampleRate, reader.Samples);
|
||||||
|
recognizer.Decode(stream);
|
||||||
|
var text = stream.Result.Text;
|
||||||
|
Console.WriteLine("Text (English): {0}", text);
|
||||||
|
|
||||||
|
// Now output text in German
|
||||||
|
config.ModelConfig.Canary.TgtLang = "de";
|
||||||
|
recognizer.SetConfig(config);
|
||||||
|
|
||||||
|
stream = recognizer.CreateStream();
|
||||||
|
stream.AcceptWaveform(reader.SampleRate, reader.Samples);
|
||||||
|
recognizer.Decode(stream);
|
||||||
|
text = stream.Result.Text;
|
||||||
|
Console.WriteLine("Text (German): {0}", text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<OutputType>Exe</OutputType>
|
||||||
|
<TargetFramework>net8.0</TargetFramework>
|
||||||
|
<RootNamespace>non_streaming_canary_decode_files</RootNamespace>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<ProjectReference Include="..\Common\Common.csproj" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
11
dotnet-examples/non-streaming-canary-decode-files/run.sh
Executable file
11
dotnet-examples/non-streaming-canary-decode-files/run.sh
Executable file
@@ -0,0 +1,11 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||||
|
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
dotnet run
|
||||||
@@ -39,6 +39,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speech-enhancement-gtcrn",
|
|||||||
EndProject
|
EndProject
|
||||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "version-test", "version-test\version-test.csproj", "{E57711E5-6546-4BA0-B627-79C94F415BC5}"
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "version-test", "version-test\version-test.csproj", "{E57711E5-6546-4BA0-B627-79C94F415BC5}"
|
||||||
EndProject
|
EndProject
|
||||||
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "non-streaming-canary-decode-files", "non-streaming-canary-decode-files\non-streaming-canary-decode-files.csproj", "{925779DB-4429-4366-87C3-B14DD44AE1D4}"
|
||||||
|
EndProject
|
||||||
Global
|
Global
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
Debug|Any CPU = Debug|Any CPU
|
Debug|Any CPU = Debug|Any CPU
|
||||||
@@ -117,6 +119,10 @@ Global
|
|||||||
{E57711E5-6546-4BA0-B627-79C94F415BC5}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
{E57711E5-6546-4BA0-B627-79C94F415BC5}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
{E57711E5-6546-4BA0-B627-79C94F415BC5}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
{E57711E5-6546-4BA0-B627-79C94F415BC5}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
{E57711E5-6546-4BA0-B627-79C94F415BC5}.Release|Any CPU.Build.0 = Release|Any CPU
|
{E57711E5-6546-4BA0-B627-79C94F415BC5}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
{925779DB-4429-4366-87C3-B14DD44AE1D4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{925779DB-4429-4366-87C3-B14DD44AE1D4}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{925779DB-4429-4366-87C3-B14DD44AE1D4}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{925779DB-4429-4366-87C3-B14DD44AE1D4}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
EndGlobalSection
|
EndGlobalSection
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
GlobalSection(SolutionProperties) = preSolution
|
||||||
HideSolutionNode = FALSE
|
HideSolutionNode = FALSE
|
||||||
|
|||||||
@@ -163,6 +163,44 @@ class OfflineWhisperModelConfig {
|
|||||||
final int tailPaddings;
|
final int tailPaddings;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class OfflineCanaryModelConfig {
|
||||||
|
const OfflineCanaryModelConfig(
|
||||||
|
{this.encoder = '',
|
||||||
|
this.decoder = '',
|
||||||
|
this.srcLang = 'en',
|
||||||
|
this.tgtLang = 'en',
|
||||||
|
this.usePnc = true});
|
||||||
|
|
||||||
|
factory OfflineCanaryModelConfig.fromJson(Map<String, dynamic> json) {
|
||||||
|
return OfflineCanaryModelConfig(
|
||||||
|
encoder: json['encoder'] as String? ?? '',
|
||||||
|
decoder: json['decoder'] as String? ?? '',
|
||||||
|
srcLang: json['srcLang'] as String? ?? 'en',
|
||||||
|
tgtLang: json['tgtLang'] as String? ?? 'en',
|
||||||
|
usePnc: json['usePnc'] as bool? ?? true,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@override
|
||||||
|
String toString() {
|
||||||
|
return 'OfflineCanaryModelConfig(encoder: $encoder, decoder: $decoder, srcLang: $srcLang, tgtLang: $tgtLang, usePnc: $usePnc)';
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<String, dynamic> toJson() => {
|
||||||
|
'encoder': encoder,
|
||||||
|
'decoder': decoder,
|
||||||
|
'srcLang': srcLang,
|
||||||
|
'tgtLang': tgtLang,
|
||||||
|
'usePnc': usePnc,
|
||||||
|
};
|
||||||
|
|
||||||
|
final String encoder;
|
||||||
|
final String decoder;
|
||||||
|
final String srcLang;
|
||||||
|
final String tgtLang;
|
||||||
|
final bool usePnc;
|
||||||
|
}
|
||||||
|
|
||||||
class OfflineFireRedAsrModelConfig {
|
class OfflineFireRedAsrModelConfig {
|
||||||
const OfflineFireRedAsrModelConfig({this.encoder = '', this.decoder = ''});
|
const OfflineFireRedAsrModelConfig({this.encoder = '', this.decoder = ''});
|
||||||
|
|
||||||
@@ -310,6 +348,7 @@ class OfflineModelConfig {
|
|||||||
this.fireRedAsr = const OfflineFireRedAsrModelConfig(),
|
this.fireRedAsr = const OfflineFireRedAsrModelConfig(),
|
||||||
this.dolphin = const OfflineDolphinModelConfig(),
|
this.dolphin = const OfflineDolphinModelConfig(),
|
||||||
this.zipformerCtc = const OfflineZipformerCtcModelConfig(),
|
this.zipformerCtc = const OfflineZipformerCtcModelConfig(),
|
||||||
|
this.canary = const OfflineCanaryModelConfig(),
|
||||||
required this.tokens,
|
required this.tokens,
|
||||||
this.numThreads = 1,
|
this.numThreads = 1,
|
||||||
this.debug = true,
|
this.debug = true,
|
||||||
@@ -362,6 +401,10 @@ class OfflineModelConfig {
|
|||||||
? OfflineZipformerCtcModelConfig.fromJson(
|
? OfflineZipformerCtcModelConfig.fromJson(
|
||||||
json['zipformerCtc'] as Map<String, dynamic>)
|
json['zipformerCtc'] as Map<String, dynamic>)
|
||||||
: const OfflineZipformerCtcModelConfig(),
|
: const OfflineZipformerCtcModelConfig(),
|
||||||
|
canary: json['canary'] != null
|
||||||
|
? OfflineCanaryModelConfig.fromJson(
|
||||||
|
json['canary'] as Map<String, dynamic>)
|
||||||
|
: const OfflineCanaryModelConfig(),
|
||||||
tokens: json['tokens'] as String,
|
tokens: json['tokens'] as String,
|
||||||
numThreads: json['numThreads'] as int? ?? 1,
|
numThreads: json['numThreads'] as int? ?? 1,
|
||||||
debug: json['debug'] as bool? ?? true,
|
debug: json['debug'] as bool? ?? true,
|
||||||
@@ -375,7 +418,7 @@ class OfflineModelConfig {
|
|||||||
|
|
||||||
@override
|
@override
|
||||||
String toString() {
|
String toString() {
|
||||||
return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, fireRedAsr: $fireRedAsr, dolphin: $dolphin, zipformerCtc: $zipformerCtc, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)';
|
return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, fireRedAsr: $fireRedAsr, dolphin: $dolphin, zipformerCtc: $zipformerCtc, canary: $canary, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)';
|
||||||
}
|
}
|
||||||
|
|
||||||
Map<String, dynamic> toJson() => {
|
Map<String, dynamic> toJson() => {
|
||||||
@@ -389,6 +432,7 @@ class OfflineModelConfig {
|
|||||||
'fireRedAsr': fireRedAsr.toJson(),
|
'fireRedAsr': fireRedAsr.toJson(),
|
||||||
'dolphin': dolphin.toJson(),
|
'dolphin': dolphin.toJson(),
|
||||||
'zipformerCtc': zipformerCtc.toJson(),
|
'zipformerCtc': zipformerCtc.toJson(),
|
||||||
|
'canary': canary.toJson(),
|
||||||
'tokens': tokens,
|
'tokens': tokens,
|
||||||
'numThreads': numThreads,
|
'numThreads': numThreads,
|
||||||
'debug': debug,
|
'debug': debug,
|
||||||
@@ -409,6 +453,7 @@ class OfflineModelConfig {
|
|||||||
final OfflineFireRedAsrModelConfig fireRedAsr;
|
final OfflineFireRedAsrModelConfig fireRedAsr;
|
||||||
final OfflineDolphinModelConfig dolphin;
|
final OfflineDolphinModelConfig dolphin;
|
||||||
final OfflineZipformerCtcModelConfig zipformerCtc;
|
final OfflineZipformerCtcModelConfig zipformerCtc;
|
||||||
|
final OfflineCanaryModelConfig canary;
|
||||||
|
|
||||||
final String tokens;
|
final String tokens;
|
||||||
final int numThreads;
|
final int numThreads;
|
||||||
@@ -549,7 +594,28 @@ class OfflineRecognizer {
|
|||||||
|
|
||||||
/// The user is responsible to call the OfflineRecognizer.free()
|
/// The user is responsible to call the OfflineRecognizer.free()
|
||||||
/// method of the returned instance to avoid memory leak.
|
/// method of the returned instance to avoid memory leak.
|
||||||
|
|
||||||
factory OfflineRecognizer(OfflineRecognizerConfig config) {
|
factory OfflineRecognizer(OfflineRecognizerConfig config) {
|
||||||
|
final c = convertConfig(config);
|
||||||
|
|
||||||
|
final ptr = SherpaOnnxBindings.createOfflineRecognizer?.call(c) ?? nullptr;
|
||||||
|
|
||||||
|
freeConfig(c);
|
||||||
|
|
||||||
|
return OfflineRecognizer._(ptr: ptr, config: config);
|
||||||
|
}
|
||||||
|
|
||||||
|
void setConfig(OfflineRecognizerConfig config) {
|
||||||
|
final c = convertConfig(config);
|
||||||
|
|
||||||
|
SherpaOnnxBindings.offlineRecognizerSetConfig?.call(ptr, c);
|
||||||
|
|
||||||
|
freeConfig(c);
|
||||||
|
// we don't update this.config
|
||||||
|
}
|
||||||
|
|
||||||
|
static Pointer<SherpaOnnxOfflineRecognizerConfig> convertConfig(
|
||||||
|
OfflineRecognizerConfig config) {
|
||||||
final c = calloc<SherpaOnnxOfflineRecognizerConfig>();
|
final c = calloc<SherpaOnnxOfflineRecognizerConfig>();
|
||||||
|
|
||||||
c.ref.feat.sampleRate = config.feat.sampleRate;
|
c.ref.feat.sampleRate = config.feat.sampleRate;
|
||||||
@@ -609,6 +675,12 @@ class OfflineRecognizer {
|
|||||||
c.ref.model.zipformerCtc.model =
|
c.ref.model.zipformerCtc.model =
|
||||||
config.model.zipformerCtc.model.toNativeUtf8();
|
config.model.zipformerCtc.model.toNativeUtf8();
|
||||||
|
|
||||||
|
c.ref.model.canary.encoder = config.model.canary.encoder.toNativeUtf8();
|
||||||
|
c.ref.model.canary.decoder = config.model.canary.decoder.toNativeUtf8();
|
||||||
|
c.ref.model.canary.srcLang = config.model.canary.srcLang.toNativeUtf8();
|
||||||
|
c.ref.model.canary.tgtLang = config.model.canary.tgtLang.toNativeUtf8();
|
||||||
|
c.ref.model.canary.usePnc = config.model.canary.usePnc ? 1 : 0;
|
||||||
|
|
||||||
c.ref.model.tokens = config.model.tokens.toNativeUtf8();
|
c.ref.model.tokens = config.model.tokens.toNativeUtf8();
|
||||||
|
|
||||||
c.ref.model.numThreads = config.model.numThreads;
|
c.ref.model.numThreads = config.model.numThreads;
|
||||||
@@ -637,8 +709,10 @@ class OfflineRecognizer {
|
|||||||
c.ref.hr.lexicon = config.hr.lexicon.toNativeUtf8();
|
c.ref.hr.lexicon = config.hr.lexicon.toNativeUtf8();
|
||||||
c.ref.hr.ruleFsts = config.hr.ruleFsts.toNativeUtf8();
|
c.ref.hr.ruleFsts = config.hr.ruleFsts.toNativeUtf8();
|
||||||
|
|
||||||
final ptr = SherpaOnnxBindings.createOfflineRecognizer?.call(c) ?? nullptr;
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void freeConfig(Pointer<SherpaOnnxOfflineRecognizerConfig> c) {
|
||||||
calloc.free(c.ref.hr.dictDir);
|
calloc.free(c.ref.hr.dictDir);
|
||||||
calloc.free(c.ref.hr.lexicon);
|
calloc.free(c.ref.hr.lexicon);
|
||||||
calloc.free(c.ref.hr.ruleFsts);
|
calloc.free(c.ref.hr.ruleFsts);
|
||||||
@@ -653,6 +727,10 @@ class OfflineRecognizer {
|
|||||||
calloc.free(c.ref.model.modelType);
|
calloc.free(c.ref.model.modelType);
|
||||||
calloc.free(c.ref.model.provider);
|
calloc.free(c.ref.model.provider);
|
||||||
calloc.free(c.ref.model.tokens);
|
calloc.free(c.ref.model.tokens);
|
||||||
|
calloc.free(c.ref.model.canary.tgtLang);
|
||||||
|
calloc.free(c.ref.model.canary.srcLang);
|
||||||
|
calloc.free(c.ref.model.canary.decoder);
|
||||||
|
calloc.free(c.ref.model.canary.encoder);
|
||||||
calloc.free(c.ref.model.zipformerCtc.model);
|
calloc.free(c.ref.model.zipformerCtc.model);
|
||||||
calloc.free(c.ref.model.dolphin.model);
|
calloc.free(c.ref.model.dolphin.model);
|
||||||
calloc.free(c.ref.model.fireRedAsr.decoder);
|
calloc.free(c.ref.model.fireRedAsr.decoder);
|
||||||
@@ -674,8 +752,6 @@ class OfflineRecognizer {
|
|||||||
calloc.free(c.ref.model.transducer.decoder);
|
calloc.free(c.ref.model.transducer.decoder);
|
||||||
calloc.free(c.ref.model.transducer.joiner);
|
calloc.free(c.ref.model.transducer.joiner);
|
||||||
calloc.free(c);
|
calloc.free(c);
|
||||||
|
|
||||||
return OfflineRecognizer._(ptr: ptr, config: config);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The user has to invoke stream.free() on the returned instance
|
/// The user has to invoke stream.free() on the returned instance
|
||||||
|
|||||||
@@ -280,6 +280,16 @@ final class SherpaOnnxOfflineWhisperModelConfig extends Struct {
|
|||||||
external int tailPaddings;
|
external int tailPaddings;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final class SherpaOnnxOfflineCanaryModelConfig extends Struct {
|
||||||
|
external Pointer<Utf8> encoder;
|
||||||
|
external Pointer<Utf8> decoder;
|
||||||
|
external Pointer<Utf8> srcLang;
|
||||||
|
external Pointer<Utf8> tgtLang;
|
||||||
|
|
||||||
|
@Int32()
|
||||||
|
external int usePnc;
|
||||||
|
}
|
||||||
|
|
||||||
final class SherpaOnnxOfflineMoonshineModelConfig extends Struct {
|
final class SherpaOnnxOfflineMoonshineModelConfig extends Struct {
|
||||||
external Pointer<Utf8> preprocessor;
|
external Pointer<Utf8> preprocessor;
|
||||||
external Pointer<Utf8> encoder;
|
external Pointer<Utf8> encoder;
|
||||||
@@ -338,6 +348,7 @@ final class SherpaOnnxOfflineModelConfig extends Struct {
|
|||||||
external SherpaOnnxOfflineFireRedAsrModelConfig fireRedAsr;
|
external SherpaOnnxOfflineFireRedAsrModelConfig fireRedAsr;
|
||||||
external SherpaOnnxOfflineDolphinModelConfig dolphin;
|
external SherpaOnnxOfflineDolphinModelConfig dolphin;
|
||||||
external SherpaOnnxOfflineZipformerCtcModelConfig zipformerCtc;
|
external SherpaOnnxOfflineZipformerCtcModelConfig zipformerCtc;
|
||||||
|
external SherpaOnnxOfflineCanaryModelConfig canary;
|
||||||
}
|
}
|
||||||
|
|
||||||
final class SherpaOnnxOfflineRecognizerConfig extends Struct {
|
final class SherpaOnnxOfflineRecognizerConfig extends Struct {
|
||||||
@@ -876,6 +887,14 @@ typedef CreateOfflineRecognizerNative = Pointer<SherpaOnnxOfflineRecognizer>
|
|||||||
|
|
||||||
typedef CreateOfflineRecognizer = CreateOfflineRecognizerNative;
|
typedef CreateOfflineRecognizer = CreateOfflineRecognizerNative;
|
||||||
|
|
||||||
|
typedef OfflineRecognizerSetConfigNative = Void Function(
|
||||||
|
Pointer<SherpaOnnxOfflineRecognizer>,
|
||||||
|
Pointer<SherpaOnnxOfflineRecognizerConfig>);
|
||||||
|
|
||||||
|
typedef OfflineRecognizerSetConfig = void Function(
|
||||||
|
Pointer<SherpaOnnxOfflineRecognizer>,
|
||||||
|
Pointer<SherpaOnnxOfflineRecognizerConfig>);
|
||||||
|
|
||||||
typedef DestroyOfflineRecognizerNative = Void Function(
|
typedef DestroyOfflineRecognizerNative = Void Function(
|
||||||
Pointer<SherpaOnnxOfflineRecognizer>);
|
Pointer<SherpaOnnxOfflineRecognizer>);
|
||||||
|
|
||||||
@@ -1341,6 +1360,7 @@ class SherpaOnnxBindings {
|
|||||||
|
|
||||||
static CreateOfflineRecognizer? createOfflineRecognizer;
|
static CreateOfflineRecognizer? createOfflineRecognizer;
|
||||||
static DestroyOfflineRecognizer? destroyOfflineRecognizer;
|
static DestroyOfflineRecognizer? destroyOfflineRecognizer;
|
||||||
|
static OfflineRecognizerSetConfig? offlineRecognizerSetConfig;
|
||||||
static CreateOfflineStream? createOfflineStream;
|
static CreateOfflineStream? createOfflineStream;
|
||||||
static DestroyOfflineStream? destroyOfflineStream;
|
static DestroyOfflineStream? destroyOfflineStream;
|
||||||
static AcceptWaveformOffline? acceptWaveformOffline;
|
static AcceptWaveformOffline? acceptWaveformOffline;
|
||||||
@@ -1741,6 +1761,11 @@ class SherpaOnnxBindings {
|
|||||||
'SherpaOnnxDestroyOfflineRecognizer')
|
'SherpaOnnxDestroyOfflineRecognizer')
|
||||||
.asFunction();
|
.asFunction();
|
||||||
|
|
||||||
|
offlineRecognizerSetConfig ??= dynamicLibrary
|
||||||
|
.lookup<NativeFunction<OfflineRecognizerSetConfigNative>>(
|
||||||
|
'SherpaOnnxOfflineRecognizerSetConfig')
|
||||||
|
.asFunction();
|
||||||
|
|
||||||
createOfflineStream ??= dynamicLibrary
|
createOfflineStream ??= dynamicLibrary
|
||||||
.lookup<NativeFunction<CreateOfflineStreamNative>>(
|
.lookup<NativeFunction<CreateOfflineStreamNative>>(
|
||||||
'SherpaOnnxCreateOfflineStream')
|
'SherpaOnnxCreateOfflineStream')
|
||||||
|
|||||||
17
go-api-examples/non-streaming-canary-decode-files/go.mod
Normal file
17
go-api-examples/non-streaming-canary-decode-files/go.mod
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
module non-streaming-canary-decode-files
|
||||||
|
|
||||||
|
go 1.17
|
||||||
|
|
||||||
|
require (
|
||||||
|
github.com/k2-fsa/sherpa-onnx-go v1.12.4
|
||||||
|
github.com/spf13/pflag v1.0.6
|
||||||
|
github.com/youpy/go-wav v0.3.2
|
||||||
|
)
|
||||||
|
|
||||||
|
require (
|
||||||
|
github.com/k2-fsa/sherpa-onnx-go-linux v1.12.4 // indirect
|
||||||
|
github.com/k2-fsa/sherpa-onnx-go-macos v1.12.4 // indirect
|
||||||
|
github.com/k2-fsa/sherpa-onnx-go-windows v1.12.4 // indirect
|
||||||
|
github.com/youpy/go-riff v0.1.0 // indirect
|
||||||
|
github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b // indirect
|
||||||
|
)
|
||||||
113
go-api-examples/non-streaming-canary-decode-files/main.go
Normal file
113
go-api-examples/non-streaming-canary-decode-files/main.go
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/binary"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
|
||||||
|
"github.com/youpy/go-wav"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
log.SetFlags(log.LstdFlags | log.Lmicroseconds)
|
||||||
|
|
||||||
|
config := sherpa.OfflineRecognizerConfig{}
|
||||||
|
|
||||||
|
config.ModelConfig.Canary.Encoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx"
|
||||||
|
config.ModelConfig.Canary.Decoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx"
|
||||||
|
config.ModelConfig.Canary.SrcLang = "en"
|
||||||
|
config.ModelConfig.Canary.TgtLang = "en"
|
||||||
|
config.ModelConfig.Canary.UsePnc = 1
|
||||||
|
config.ModelConfig.Tokens = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt"
|
||||||
|
|
||||||
|
waveFilename := "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav"
|
||||||
|
|
||||||
|
samples, sampleRate := readWave(waveFilename)
|
||||||
|
|
||||||
|
log.Println("Initializing recognizer (may take several seconds)")
|
||||||
|
recognizer := sherpa.NewOfflineRecognizer(&config)
|
||||||
|
log.Println("Recognizer created!")
|
||||||
|
defer sherpa.DeleteOfflineRecognizer(recognizer)
|
||||||
|
|
||||||
|
log.Println("Start decoding!")
|
||||||
|
stream := sherpa.NewOfflineStream(recognizer)
|
||||||
|
defer sherpa.DeleteOfflineStream(stream)
|
||||||
|
|
||||||
|
stream.AcceptWaveform(sampleRate, samples)
|
||||||
|
|
||||||
|
recognizer.Decode(stream)
|
||||||
|
log.Println("Decoding done!")
|
||||||
|
result := stream.GetResult()
|
||||||
|
|
||||||
|
log.Println("Text in English: " + strings.ToLower(result.Text))
|
||||||
|
|
||||||
|
s := sherpa.NewOfflineStream(recognizer)
|
||||||
|
defer sherpa.DeleteOfflineStream(s)
|
||||||
|
|
||||||
|
s.AcceptWaveform(sampleRate, samples)
|
||||||
|
|
||||||
|
config.ModelConfig.Canary.TgtLang = "de"
|
||||||
|
recognizer.SetConfig(&config)
|
||||||
|
recognizer.Decode(s)
|
||||||
|
result = s.GetResult()
|
||||||
|
|
||||||
|
log.Println("Text in German: " + strings.ToLower(result.Text))
|
||||||
|
}
|
||||||
|
|
||||||
|
func readWave(filename string) (samples []float32, sampleRate int) {
|
||||||
|
file, _ := os.Open(filename)
|
||||||
|
defer file.Close()
|
||||||
|
|
||||||
|
reader := wav.NewReader(file)
|
||||||
|
format, err := reader.Format()
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Failed to read wave format")
|
||||||
|
}
|
||||||
|
|
||||||
|
if format.AudioFormat != 1 {
|
||||||
|
log.Fatalf("Support only PCM format. Given: %v\n", format.AudioFormat)
|
||||||
|
}
|
||||||
|
|
||||||
|
if format.NumChannels != 1 {
|
||||||
|
log.Fatalf("Support only 1 channel wave file. Given: %v\n", format.NumChannels)
|
||||||
|
}
|
||||||
|
|
||||||
|
if format.BitsPerSample != 16 {
|
||||||
|
log.Fatalf("Support only 16-bit per sample. Given: %v\n", format.BitsPerSample)
|
||||||
|
}
|
||||||
|
|
||||||
|
reader.Duration() // so that it initializes reader.Size
|
||||||
|
|
||||||
|
buf := make([]byte, reader.Size)
|
||||||
|
n, err := reader.Read(buf)
|
||||||
|
if n != int(reader.Size) {
|
||||||
|
log.Fatalf("Failed to read %v bytes. Returned %v bytes\n", reader.Size, n)
|
||||||
|
}
|
||||||
|
|
||||||
|
samples = samplesInt16ToFloat(buf)
|
||||||
|
sampleRate = int(format.SampleRate)
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func samplesInt16ToFloat(inSamples []byte) []float32 {
|
||||||
|
numSamples := len(inSamples) / 2
|
||||||
|
outSamples := make([]float32, numSamples)
|
||||||
|
|
||||||
|
for i := 0; i != numSamples; i++ {
|
||||||
|
s := inSamples[i*2 : (i+1)*2]
|
||||||
|
|
||||||
|
var s16 int16
|
||||||
|
buf := bytes.NewReader(s)
|
||||||
|
err := binary.Read(buf, binary.LittleEndian, &s16)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal("Failed to parse 16-bit sample")
|
||||||
|
}
|
||||||
|
outSamples[i] = float32(s16) / 32768
|
||||||
|
}
|
||||||
|
|
||||||
|
return outSamples
|
||||||
|
}
|
||||||
13
go-api-examples/non-streaming-canary-decode-files/run.sh
Executable file
13
go-api-examples/non-streaming-canary-decode-files/run.sh
Executable file
@@ -0,0 +1,13 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||||
|
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
go mod tidy
|
||||||
|
go build
|
||||||
|
./non-streaming-canary-decode-files
|
||||||
@@ -10,3 +10,4 @@ telespeech_ctc
|
|||||||
moonshine
|
moonshine
|
||||||
dolphin_ctc
|
dolphin_ctc
|
||||||
zipformer_ctc
|
zipformer_ctc
|
||||||
|
nemo_canary
|
||||||
|
|||||||
107
pascal-api-examples/non-streaming-asr/nemo_canary.pas
Normal file
107
pascal-api-examples/non-streaming-asr/nemo_canary.pas
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
{ Copyright (c) 2025 Xiaomi Corporation }
|
||||||
|
|
||||||
|
{
|
||||||
|
This file shows how to use a non-streaming NeMo Canary model
|
||||||
|
to decode files.
|
||||||
|
|
||||||
|
You can download the model files from
|
||||||
|
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
}
|
||||||
|
|
||||||
|
program nemo_canary;
|
||||||
|
|
||||||
|
{$mode objfpc}
|
||||||
|
|
||||||
|
uses
|
||||||
|
sherpa_onnx,
|
||||||
|
DateUtils,
|
||||||
|
SysUtils;
|
||||||
|
|
||||||
|
var
|
||||||
|
Wave: TSherpaOnnxWave;
|
||||||
|
WaveFilename: AnsiString;
|
||||||
|
|
||||||
|
Config: TSherpaOnnxOfflineRecognizerConfig;
|
||||||
|
Recognizer: TSherpaOnnxOfflineRecognizer;
|
||||||
|
Stream: TSherpaOnnxOfflineStream;
|
||||||
|
RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
|
||||||
|
|
||||||
|
Start: TDateTime;
|
||||||
|
Stop: TDateTime;
|
||||||
|
|
||||||
|
Elapsed: Single;
|
||||||
|
Duration: Single;
|
||||||
|
RealTimeFactor: Single;
|
||||||
|
begin
|
||||||
|
Initialize(Config);
|
||||||
|
|
||||||
|
Config.ModelConfig.Canary.Encoder := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx';
|
||||||
|
Config.ModelConfig.Canary.Decoder := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx';
|
||||||
|
Config.ModelConfig.Canary.SrcLang := 'en';
|
||||||
|
Config.ModelConfig.Canary.TgtLang := 'en';
|
||||||
|
Config.ModelConfig.Canary.UsePnc := True;
|
||||||
|
Config.ModelConfig.Tokens := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt';
|
||||||
|
Config.ModelConfig.Provider := 'cpu';
|
||||||
|
Config.ModelConfig.NumThreads := 1;
|
||||||
|
Config.ModelConfig.Debug := False;
|
||||||
|
|
||||||
|
WaveFilename := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav';
|
||||||
|
|
||||||
|
Wave := SherpaOnnxReadWave(WaveFilename);
|
||||||
|
|
||||||
|
Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
|
||||||
|
Stream := Recognizer.CreateStream();
|
||||||
|
Start := Now;
|
||||||
|
|
||||||
|
Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
|
||||||
|
Recognizer.Decode(Stream);
|
||||||
|
|
||||||
|
RecognitionResult := Recognizer.GetResult(Stream);
|
||||||
|
|
||||||
|
Stop := Now;
|
||||||
|
|
||||||
|
Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
|
||||||
|
Duration := Length(Wave.Samples) / Wave.SampleRate;
|
||||||
|
RealTimeFactor := Elapsed / Duration;
|
||||||
|
|
||||||
|
WriteLn(RecognitionResult.ToString);
|
||||||
|
WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
|
||||||
|
WriteLn(Format('Elapsed %.3f s', [Elapsed]));
|
||||||
|
WriteLn(Format('Wave duration %.3f s', [Duration]));
|
||||||
|
WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
|
||||||
|
|
||||||
|
FreeAndNil(Stream);
|
||||||
|
|
||||||
|
WriteLn('-----------Output German-----');
|
||||||
|
|
||||||
|
Stream := Recognizer.CreateStream();
|
||||||
|
Start := Now;
|
||||||
|
|
||||||
|
Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
|
||||||
|
|
||||||
|
Config.ModelConfig.Canary.TgtLang := 'de';
|
||||||
|
Recognizer.SetConfig(Config);
|
||||||
|
Recognizer.Decode(Stream);
|
||||||
|
|
||||||
|
RecognitionResult := Recognizer.GetResult(Stream);
|
||||||
|
|
||||||
|
Stop := Now;
|
||||||
|
|
||||||
|
Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
|
||||||
|
Duration := Length(Wave.Samples) / Wave.SampleRate;
|
||||||
|
RealTimeFactor := Elapsed / Duration;
|
||||||
|
|
||||||
|
WriteLn(RecognitionResult.ToString);
|
||||||
|
WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
|
||||||
|
WriteLn(Format('Elapsed %.3f s', [Elapsed]));
|
||||||
|
WriteLn(Format('Wave duration %.3f s', [Duration]));
|
||||||
|
WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
|
||||||
|
|
||||||
|
{Free resources to avoid memory leak.
|
||||||
|
|
||||||
|
Note: You don't need to invoke them for this simple script.
|
||||||
|
However, you have to invoke them in your own large/complex project.
|
||||||
|
}
|
||||||
|
FreeAndNil(Stream);
|
||||||
|
FreeAndNil(Recognizer);
|
||||||
|
end.
|
||||||
42
pascal-api-examples/non-streaming-asr/run-nemo-canary.sh
Executable file
42
pascal-api-examples/non-streaming-asr/run-nemo-canary.sh
Executable file
@@ -0,0 +1,42 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
|
||||||
|
|
||||||
|
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
|
||||||
|
|
||||||
|
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
|
||||||
|
mkdir -p ../../build
|
||||||
|
pushd ../../build
|
||||||
|
cmake \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=./install \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||||
|
-DBUILD_SHARED_LIBS=ON \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||||
|
..
|
||||||
|
|
||||||
|
cmake --build . --target install --config Release
|
||||||
|
ls -lh lib
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||||
|
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
fpc \
|
||||||
|
-dSHERPA_ONNX_USE_SHARED_LIBS \
|
||||||
|
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
|
||||||
|
-Fl$SHERPA_ONNX_DIR/build/install/lib \
|
||||||
|
./nemo_canary.pas
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
|
||||||
|
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
|
||||||
|
./nemo_canary
|
||||||
32
scripts/dotnet/OfflineCanaryModelConfig.cs
Normal file
32
scripts/dotnet/OfflineCanaryModelConfig.cs
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
/// Copyright (c) 2024.5 by 东风破
|
||||||
|
|
||||||
|
using System.Runtime.InteropServices;
|
||||||
|
|
||||||
|
namespace SherpaOnnx
|
||||||
|
{
|
||||||
|
[StructLayout(LayoutKind.Sequential)]
|
||||||
|
public struct OfflineCanaryModelConfig
|
||||||
|
{
|
||||||
|
public OfflineCanaryModelConfig()
|
||||||
|
{
|
||||||
|
Encoder = "";
|
||||||
|
Decoder = "";
|
||||||
|
SrcLang = "en";
|
||||||
|
TgtLang = "en";
|
||||||
|
UsePnc = 1;
|
||||||
|
}
|
||||||
|
[MarshalAs(UnmanagedType.LPStr)]
|
||||||
|
public string Encoder;
|
||||||
|
|
||||||
|
[MarshalAs(UnmanagedType.LPStr)]
|
||||||
|
public string Decoder;
|
||||||
|
|
||||||
|
[MarshalAs(UnmanagedType.LPStr)]
|
||||||
|
public string SrcLang;
|
||||||
|
|
||||||
|
[MarshalAs(UnmanagedType.LPStr)]
|
||||||
|
public string TgtLang;
|
||||||
|
|
||||||
|
public int UsePnc;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -28,6 +28,7 @@ namespace SherpaOnnx
|
|||||||
FireRedAsr = new OfflineFireRedAsrModelConfig();
|
FireRedAsr = new OfflineFireRedAsrModelConfig();
|
||||||
Dolphin = new OfflineDolphinModelConfig();
|
Dolphin = new OfflineDolphinModelConfig();
|
||||||
ZipformerCtc = new OfflineZipformerCtcModelConfig();
|
ZipformerCtc = new OfflineZipformerCtcModelConfig();
|
||||||
|
Canary = new OfflineCanaryModelConfig();
|
||||||
}
|
}
|
||||||
public OfflineTransducerModelConfig Transducer;
|
public OfflineTransducerModelConfig Transducer;
|
||||||
public OfflineParaformerModelConfig Paraformer;
|
public OfflineParaformerModelConfig Paraformer;
|
||||||
@@ -62,5 +63,6 @@ namespace SherpaOnnx
|
|||||||
public OfflineFireRedAsrModelConfig FireRedAsr;
|
public OfflineFireRedAsrModelConfig FireRedAsr;
|
||||||
public OfflineDolphinModelConfig Dolphin;
|
public OfflineDolphinModelConfig Dolphin;
|
||||||
public OfflineZipformerCtcModelConfig ZipformerCtc;
|
public OfflineZipformerCtcModelConfig ZipformerCtc;
|
||||||
|
public OfflineCanaryModelConfig Canary;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,6 +14,11 @@ namespace SherpaOnnx
|
|||||||
_handle = new HandleRef(this, h);
|
_handle = new HandleRef(this, h);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void SetConfig(OfflineRecognizerConfig config)
|
||||||
|
{
|
||||||
|
SherpaOnnxOfflineRecognizerSetConfig(_handle.Handle, ref config);
|
||||||
|
}
|
||||||
|
|
||||||
public OfflineStream CreateStream()
|
public OfflineStream CreateStream()
|
||||||
{
|
{
|
||||||
IntPtr p = SherpaOnnxCreateOfflineStream(_handle.Handle);
|
IntPtr p = SherpaOnnxCreateOfflineStream(_handle.Handle);
|
||||||
@@ -65,6 +70,9 @@ namespace SherpaOnnx
|
|||||||
[DllImport(Dll.Filename)]
|
[DllImport(Dll.Filename)]
|
||||||
private static extern IntPtr SherpaOnnxCreateOfflineRecognizer(ref OfflineRecognizerConfig config);
|
private static extern IntPtr SherpaOnnxCreateOfflineRecognizer(ref OfflineRecognizerConfig config);
|
||||||
|
|
||||||
|
[DllImport(Dll.Filename)]
|
||||||
|
private static extern void SherpaOnnxOfflineRecognizerSetConfig(IntPtr handle, ref OfflineRecognizerConfig config);
|
||||||
|
|
||||||
[DllImport(Dll.Filename)]
|
[DllImport(Dll.Filename)]
|
||||||
private static extern void SherpaOnnxDestroyOfflineRecognizer(IntPtr handle);
|
private static extern void SherpaOnnxDestroyOfflineRecognizer(IntPtr handle);
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,5 @@
|
|||||||
|
module non-streaming-canary-decode-files
|
||||||
|
|
||||||
|
go 1.17
|
||||||
|
|
||||||
|
replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../
|
||||||
1
scripts/go/_internal/non-streaming-canary-decode-files/main.go
Symbolic link
1
scripts/go/_internal/non-streaming-canary-decode-files/main.go
Symbolic link
@@ -0,0 +1 @@
|
|||||||
|
../../../../go-api-examples/non-streaming-canary-decode-files/main.go
|
||||||
1
scripts/go/_internal/non-streaming-canary-decode-files/run.sh
Symbolic link
1
scripts/go/_internal/non-streaming-canary-decode-files/run.sh
Symbolic link
@@ -0,0 +1 @@
|
|||||||
|
../../../../go-api-examples/non-streaming-canary-decode-files/run.sh
|
||||||
@@ -414,6 +414,14 @@ type OfflineWhisperModelConfig struct {
|
|||||||
TailPaddings int
|
TailPaddings int
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type OfflineCanaryModelConfig struct {
|
||||||
|
Encoder string
|
||||||
|
Decoder string
|
||||||
|
SrcLang string
|
||||||
|
TgtLang string
|
||||||
|
UsePnc int
|
||||||
|
}
|
||||||
|
|
||||||
type OfflineFireRedAsrModelConfig struct {
|
type OfflineFireRedAsrModelConfig struct {
|
||||||
Encoder string
|
Encoder string
|
||||||
Decoder string
|
Decoder string
|
||||||
@@ -453,6 +461,7 @@ type OfflineModelConfig struct {
|
|||||||
FireRedAsr OfflineFireRedAsrModelConfig
|
FireRedAsr OfflineFireRedAsrModelConfig
|
||||||
Dolphin OfflineDolphinModelConfig
|
Dolphin OfflineDolphinModelConfig
|
||||||
ZipformerCtc OfflineZipformerCtcModelConfig
|
ZipformerCtc OfflineZipformerCtcModelConfig
|
||||||
|
Canary OfflineCanaryModelConfig
|
||||||
Tokens string // Path to tokens.txt
|
Tokens string // Path to tokens.txt
|
||||||
|
|
||||||
// Number of threads to use for neural network computation
|
// Number of threads to use for neural network computation
|
||||||
@@ -547,6 +556,12 @@ func newCOfflineRecognizerConfig(config *OfflineRecognizerConfig) *C.struct_Sher
|
|||||||
c.model_config.dolphin.model = C.CString(config.ModelConfig.Dolphin.Model)
|
c.model_config.dolphin.model = C.CString(config.ModelConfig.Dolphin.Model)
|
||||||
c.model_config.zipformer_ctc.model = C.CString(config.ModelConfig.ZipformerCtc.Model)
|
c.model_config.zipformer_ctc.model = C.CString(config.ModelConfig.ZipformerCtc.Model)
|
||||||
|
|
||||||
|
c.model_config.canary.encoder = C.CString(config.ModelConfig.Canary.Encoder)
|
||||||
|
c.model_config.canary.decoder = C.CString(config.ModelConfig.Canary.Decoder)
|
||||||
|
c.model_config.canary.src_lang = C.CString(config.ModelConfig.Canary.SrcLang)
|
||||||
|
c.model_config.canary.tgt_lang = C.CString(config.ModelConfig.Canary.TgtLang)
|
||||||
|
c.model_config.canary.use_pnc = C.int(config.ModelConfig.Canary.UsePnc)
|
||||||
|
|
||||||
c.model_config.tokens = C.CString(config.ModelConfig.Tokens)
|
c.model_config.tokens = C.CString(config.ModelConfig.Tokens)
|
||||||
|
|
||||||
c.model_config.num_threads = C.int(config.ModelConfig.NumThreads)
|
c.model_config.num_threads = C.int(config.ModelConfig.NumThreads)
|
||||||
@@ -675,6 +690,26 @@ func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig)
|
|||||||
c.model_config.zipformer_ctc.model = nil
|
c.model_config.zipformer_ctc.model = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if c.model_config.canary.encoder != nil {
|
||||||
|
C.free(unsafe.Pointer(c.model_config.canary.encoder))
|
||||||
|
c.model_config.canary.encoder = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.model_config.canary.decoder != nil {
|
||||||
|
C.free(unsafe.Pointer(c.model_config.canary.decoder))
|
||||||
|
c.model_config.canary.decoder = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.model_config.canary.src_lang != nil {
|
||||||
|
C.free(unsafe.Pointer(c.model_config.canary.src_lang))
|
||||||
|
c.model_config.canary.src_lang = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.model_config.canary.tgt_lang != nil {
|
||||||
|
C.free(unsafe.Pointer(c.model_config.canary.tgt_lang))
|
||||||
|
c.model_config.canary.tgt_lang = nil
|
||||||
|
}
|
||||||
|
|
||||||
if c.model_config.tokens != nil {
|
if c.model_config.tokens != nil {
|
||||||
C.free(unsafe.Pointer(c.model_config.tokens))
|
C.free(unsafe.Pointer(c.model_config.tokens))
|
||||||
c.model_config.tokens = nil
|
c.model_config.tokens = nil
|
||||||
|
|||||||
@@ -323,7 +323,8 @@ class OnlineTransducerNeMoModel::Impl {
|
|||||||
SHERPA_ONNX_READ_META_DATA(window_size_, "window_size");
|
SHERPA_ONNX_READ_META_DATA(window_size_, "window_size");
|
||||||
SHERPA_ONNX_READ_META_DATA(chunk_shift_, "chunk_shift");
|
SHERPA_ONNX_READ_META_DATA(chunk_shift_, "chunk_shift");
|
||||||
SHERPA_ONNX_READ_META_DATA(subsampling_factor_, "subsampling_factor");
|
SHERPA_ONNX_READ_META_DATA(subsampling_factor_, "subsampling_factor");
|
||||||
SHERPA_ONNX_READ_META_DATA_STR(normalize_type_, "normalize_type");
|
SHERPA_ONNX_READ_META_DATA_STR_ALLOW_EMPTY(normalize_type_,
|
||||||
|
"normalize_type");
|
||||||
SHERPA_ONNX_READ_META_DATA(pred_rnn_layers_, "pred_rnn_layers");
|
SHERPA_ONNX_READ_META_DATA(pred_rnn_layers_, "pred_rnn_layers");
|
||||||
SHERPA_ONNX_READ_META_DATA(pred_hidden_, "pred_hidden");
|
SHERPA_ONNX_READ_META_DATA(pred_hidden_, "pred_hidden");
|
||||||
|
|
||||||
|
|||||||
@@ -299,6 +299,16 @@ type
|
|||||||
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineWhisperModelConfig);
|
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineWhisperModelConfig);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
TSherpaOnnxOfflineCanaryModelConfig = record
|
||||||
|
Encoder: AnsiString;
|
||||||
|
Decoder: AnsiString;
|
||||||
|
SrcLang: AnsiString;
|
||||||
|
TgtLang: AnsiString;
|
||||||
|
UsePnc: Boolean;
|
||||||
|
function ToString: AnsiString;
|
||||||
|
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineCanaryModelConfig);
|
||||||
|
end;
|
||||||
|
|
||||||
TSherpaOnnxOfflineMoonshineModelConfig = record
|
TSherpaOnnxOfflineMoonshineModelConfig = record
|
||||||
Preprocessor: AnsiString;
|
Preprocessor: AnsiString;
|
||||||
Encoder: AnsiString;
|
Encoder: AnsiString;
|
||||||
@@ -352,6 +362,7 @@ type
|
|||||||
FireRedAsr: TSherpaOnnxOfflineFireRedAsrModelConfig;
|
FireRedAsr: TSherpaOnnxOfflineFireRedAsrModelConfig;
|
||||||
Dolphin: TSherpaOnnxOfflineDolphinModelConfig;
|
Dolphin: TSherpaOnnxOfflineDolphinModelConfig;
|
||||||
ZipformerCtc: TSherpaOnnxOfflineZipformerCtcModelConfig;
|
ZipformerCtc: TSherpaOnnxOfflineZipformerCtcModelConfig;
|
||||||
|
Canary: TSherpaOnnxOfflineCanaryModelConfig;
|
||||||
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig);
|
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig);
|
||||||
function ToString: AnsiString;
|
function ToString: AnsiString;
|
||||||
end;
|
end;
|
||||||
@@ -398,6 +409,7 @@ type
|
|||||||
destructor Destroy; override;
|
destructor Destroy; override;
|
||||||
function CreateStream: TSherpaOnnxOfflineStream;
|
function CreateStream: TSherpaOnnxOfflineStream;
|
||||||
procedure Decode(Stream: TSherpaOnnxOfflineStream);
|
procedure Decode(Stream: TSherpaOnnxOfflineStream);
|
||||||
|
procedure SetConfig(Config: TSherpaOnnxOfflineRecognizerConfig);
|
||||||
function GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult;
|
function GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult;
|
||||||
property Config: TSherpaOnnxOfflineRecognizerConfig Read _Config;
|
property Config: TSherpaOnnxOfflineRecognizerConfig Read _Config;
|
||||||
property GetHandle: Pointer Read Handle;
|
property GetHandle: Pointer Read Handle;
|
||||||
@@ -742,6 +754,13 @@ type
|
|||||||
Task: PAnsiChar;
|
Task: PAnsiChar;
|
||||||
TailPaddings: cint32;
|
TailPaddings: cint32;
|
||||||
end;
|
end;
|
||||||
|
SherpaOnnxOfflineCanaryModelConfig = record
|
||||||
|
Encoder: PAnsiChar;
|
||||||
|
Decoder: PAnsiChar;
|
||||||
|
SrcLang: PAnsiChar;
|
||||||
|
TgtLang: PAnsiChar;
|
||||||
|
UsePnc: cint32;
|
||||||
|
end;
|
||||||
SherpaOnnxOfflineFireRedAsrModelConfig = record
|
SherpaOnnxOfflineFireRedAsrModelConfig = record
|
||||||
Encoder: PAnsiChar;
|
Encoder: PAnsiChar;
|
||||||
Decoder: PAnsiChar;
|
Decoder: PAnsiChar;
|
||||||
@@ -783,6 +802,7 @@ type
|
|||||||
FireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig;
|
FireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig;
|
||||||
Dolphin: SherpaOnnxOfflineDolphinModelConfig;
|
Dolphin: SherpaOnnxOfflineDolphinModelConfig;
|
||||||
ZipformerCtc: SherpaOnnxOfflineZipformerCtcModelConfig;
|
ZipformerCtc: SherpaOnnxOfflineZipformerCtcModelConfig;
|
||||||
|
Canary: SherpaOnnxOfflineCanaryModelConfig;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
SherpaOnnxOfflineRecognizerConfig = record
|
SherpaOnnxOfflineRecognizerConfig = record
|
||||||
@@ -1197,6 +1217,9 @@ procedure SherpaOnnxAcceptWaveformOffline(Stream: Pointer;
|
|||||||
procedure SherpaOnnxDecodeOfflineStream(Recognizer: Pointer; Stream: Pointer); cdecl;
|
procedure SherpaOnnxDecodeOfflineStream(Recognizer: Pointer; Stream: Pointer); cdecl;
|
||||||
external SherpaOnnxLibName;
|
external SherpaOnnxLibName;
|
||||||
|
|
||||||
|
procedure SherpaOnnxOfflineRecognizerSetConfig(Recognizer: Pointer; Config: PSherpaOnnxOfflineRecognizerConfig); cdecl;
|
||||||
|
external SherpaOnnxLibName;
|
||||||
|
|
||||||
function SherpaOnnxGetOfflineStreamResultAsJson(Stream: Pointer): PAnsiChar; cdecl;
|
function SherpaOnnxGetOfflineStreamResultAsJson(Stream: Pointer): PAnsiChar; cdecl;
|
||||||
external SherpaOnnxLibName;
|
external SherpaOnnxLibName;
|
||||||
|
|
||||||
@@ -1564,6 +1587,19 @@ begin
|
|||||||
[Self.Encoder, Self.Decoder, Self.Language, Self.Task, Self.TailPaddings]);
|
[Self.Encoder, Self.Decoder, Self.Language, Self.Task, Self.TailPaddings]);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOfflineCanaryModelConfig.ToString: AnsiString;
|
||||||
|
begin
|
||||||
|
Result := Format('TSherpaOnnxOfflineCanaryModelConfig(' +
|
||||||
|
'Encoder := %s, ' +
|
||||||
|
'Decoder := %s, ' +
|
||||||
|
'SrcLang := %s, ' +
|
||||||
|
'TgtLang := %s, ' +
|
||||||
|
'UsePnc := %s' +
|
||||||
|
')',
|
||||||
|
[Self.Encoder, Self.Decoder, Self.SrcLang,
|
||||||
|
Self.TgtLang, Self.UsePnc.ToString]);
|
||||||
|
end;
|
||||||
|
|
||||||
function TSherpaOnnxOfflineFireRedAsrModelConfig.ToString: AnsiString;
|
function TSherpaOnnxOfflineFireRedAsrModelConfig.ToString: AnsiString;
|
||||||
begin
|
begin
|
||||||
Result := Format('TSherpaOnnxOfflineFireRedAsrModelConfig(' +
|
Result := Format('TSherpaOnnxOfflineFireRedAsrModelConfig(' +
|
||||||
@@ -1627,14 +1663,16 @@ begin
|
|||||||
'Moonshine := %s, ' +
|
'Moonshine := %s, ' +
|
||||||
'FireRedAsr := %s, ' +
|
'FireRedAsr := %s, ' +
|
||||||
'Dolphin := %s, ' +
|
'Dolphin := %s, ' +
|
||||||
'ZipformerCtc := %s' +
|
'ZipformerCtc := %s, ' +
|
||||||
|
'Canary := %s' +
|
||||||
')',
|
')',
|
||||||
[Self.Transducer.ToString, Self.Paraformer.ToString,
|
[Self.Transducer.ToString, Self.Paraformer.ToString,
|
||||||
Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString,
|
Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString,
|
||||||
Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider,
|
Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider,
|
||||||
Self.ModelType, Self.ModelingUnit, Self.BpeVocab,
|
Self.ModelType, Self.ModelingUnit, Self.BpeVocab,
|
||||||
Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString,
|
Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString,
|
||||||
Self.FireRedAsr.ToString, Self.Dolphin.ToString, Self.ZipformerCtc.ToString
|
Self.FireRedAsr.ToString, Self.Dolphin.ToString,
|
||||||
|
Self.ZipformerCtc.ToString, Self.Canary.ToString
|
||||||
]);
|
]);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
@@ -1660,7 +1698,7 @@ begin
|
|||||||
]);
|
]);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
constructor TSherpaOnnxOfflineRecognizer.Create(Config: TSherpaOnnxOfflineRecognizerConfig);
|
function ConvertOfflineRecognizerConfig(Config: TSherpaOnnxOfflineRecognizerConfig): SherpaOnnxOfflineRecognizerConfig;
|
||||||
var
|
var
|
||||||
C: SherpaOnnxOfflineRecognizerConfig;
|
C: SherpaOnnxOfflineRecognizerConfig;
|
||||||
begin
|
begin
|
||||||
@@ -1707,6 +1745,12 @@ begin
|
|||||||
C.ModelConfig.Dolphin.Model := PAnsiChar(Config.ModelConfig.Dolphin.Model);
|
C.ModelConfig.Dolphin.Model := PAnsiChar(Config.ModelConfig.Dolphin.Model);
|
||||||
C.ModelConfig.ZipformerCtc.Model := PAnsiChar(Config.ModelConfig.ZipformerCtc.Model);
|
C.ModelConfig.ZipformerCtc.Model := PAnsiChar(Config.ModelConfig.ZipformerCtc.Model);
|
||||||
|
|
||||||
|
C.ModelConfig.Canary.Encoder := PAnsiChar(Config.ModelConfig.Canary.Encoder);
|
||||||
|
C.ModelConfig.Canary.Decoder := PAnsiChar(Config.ModelConfig.Canary.Decoder);
|
||||||
|
C.ModelConfig.Canary.SrcLang := PAnsiChar(Config.ModelConfig.Canary.SrcLang);
|
||||||
|
C.ModelConfig.Canary.TgtLang := PAnsiChar(Config.ModelConfig.Canary.TgtLang);
|
||||||
|
C.ModelConfig.Canary.UsePnc := Ord(Config.ModelConfig.Canary.UsePnc);
|
||||||
|
|
||||||
C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model);
|
C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model);
|
||||||
C.LMConfig.Scale := Config.LMConfig.Scale;
|
C.LMConfig.Scale := Config.LMConfig.Scale;
|
||||||
|
|
||||||
@@ -1722,10 +1766,27 @@ begin
|
|||||||
C.Hr.Lexicon := PAnsiChar(Config.Hr.Lexicon);
|
C.Hr.Lexicon := PAnsiChar(Config.Hr.Lexicon);
|
||||||
C.Hr.RuleFsts := PAnsiChar(Config.Hr.RuleFsts);
|
C.Hr.RuleFsts := PAnsiChar(Config.Hr.RuleFsts);
|
||||||
|
|
||||||
|
Result := C;
|
||||||
|
end;
|
||||||
|
|
||||||
|
constructor TSherpaOnnxOfflineRecognizer.Create(Config: TSherpaOnnxOfflineRecognizerConfig);
|
||||||
|
var
|
||||||
|
C: SherpaOnnxOfflineRecognizerConfig;
|
||||||
|
begin
|
||||||
|
C := ConvertOfflineRecognizerConfig(Config);
|
||||||
Self.Handle := SherpaOnnxCreateOfflineRecognizer(@C);
|
Self.Handle := SherpaOnnxCreateOfflineRecognizer(@C);
|
||||||
Self._Config := Config;
|
Self._Config := Config;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
procedure TSherpaOnnxOfflineRecognizer.SetConfig(Config: TSherpaOnnxOfflineRecognizerConfig);
|
||||||
|
var
|
||||||
|
C: SherpaOnnxOfflineRecognizerConfig;
|
||||||
|
begin
|
||||||
|
C := ConvertOfflineRecognizerConfig(Config);
|
||||||
|
SherpaOnnxOfflineRecognizerSetConfig(Self.Handle, @C);
|
||||||
|
{ We don't update Self._Config }
|
||||||
|
end;
|
||||||
|
|
||||||
destructor TSherpaOnnxOfflineRecognizer.Destroy;
|
destructor TSherpaOnnxOfflineRecognizer.Destroy;
|
||||||
begin
|
begin
|
||||||
SherpaOnnxDestroyOfflineRecognizer(Self.Handle);
|
SherpaOnnxDestroyOfflineRecognizer(Self.Handle);
|
||||||
@@ -1912,6 +1973,13 @@ begin
|
|||||||
Dest.TailPaddings := -1;
|
Dest.TailPaddings := -1;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
class operator TSherpaOnnxOfflineCanaryModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineCanaryModelConfig);
|
||||||
|
begin
|
||||||
|
Dest.SrcLang := 'en';
|
||||||
|
Dest.TgtLang := 'en';
|
||||||
|
Dest.UsePnc := True;
|
||||||
|
end;
|
||||||
|
|
||||||
class operator TSherpaOnnxOfflineLMConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineLMConfig);
|
class operator TSherpaOnnxOfflineLMConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineLMConfig);
|
||||||
begin
|
begin
|
||||||
Dest.Scale := 1.0;
|
Dest.Scale := 1.0;
|
||||||
|
|||||||
Reference in New Issue
Block a user