From b5d89d7bcb549f5802a1749cbdf88d5964d194d2 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 17 Feb 2025 15:17:08 +0800 Subject: [PATCH] Add Dart API for FireRedAsr AED Model (#1877) --- .github/scripts/test-dart.sh | 4 ++ .../non-streaming-asr/bin/fire-red-asr.dart | 58 +++++++++++++++++++ .../non-streaming-asr/run-fire-red-asr.sh | 19 ++++++ .../offline-decode-files/run-fire-red-asr.sh | 12 ++-- .../lib/src/offline_recognizer.dart | 24 +++++++- .../lib/src/sherpa_onnx_bindings.dart | 6 ++ 6 files changed, 116 insertions(+), 7 deletions(-) create mode 100644 dart-api-examples/non-streaming-asr/bin/fire-red-asr.dart create mode 100755 dart-api-examples/non-streaming-asr/run-fire-red-asr.sh diff --git a/.github/scripts/test-dart.sh b/.github/scripts/test-dart.sh index 27199ae9..f579e34f 100755 --- a/.github/scripts/test-dart.sh +++ b/.github/scripts/test-dart.sh @@ -92,6 +92,10 @@ popd pushd non-streaming-asr +echo '----------FireRedAsr----------' +./run-fire-red-asr.sh +rm -rf sherpa-onnx-fire-red-asr-* + echo '----------SenseVoice----------' ./run-sense-voice.sh rm -rf sherpa-onnx-* diff --git a/dart-api-examples/non-streaming-asr/bin/fire-red-asr.dart b/dart-api-examples/non-streaming-asr/bin/fire-red-asr.dart new file mode 100644 index 00000000..69ad0c70 --- /dev/null +++ b/dart-api-examples/non-streaming-asr/bin/fire-red-asr.dart @@ -0,0 +1,58 @@ +// Copyright (c) 2025 Xiaomi Corporation +import 'dart:io'; + +import 'package:args/args.dart'; +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; + +import './init.dart'; + +void main(List arguments) async { + await initSherpaOnnx(); + + final parser = ArgParser() + ..addOption('encoder', help: 'Path to the FireRedAsr encoder model') + ..addOption('decoder', help: 'Path to FireRedAsr decoder model') + ..addOption('tokens', help: 'Path to tokens.txt') + ..addOption('input-wav', help: 'Path to input.wav to transcribe'); + + final res = parser.parse(arguments); + if (res['encoder'] == null || + res['decoder'] == null || + res['tokens'] == null || + res['input-wav'] == null) { + print(parser.usage); + exit(1); + } + + final encoder = res['encoder'] as String; + final decoder = res['decoder'] as String; + final tokens = res['tokens'] as String; + final inputWav = res['input-wav'] as String; + + final fireRedAsr = sherpa_onnx.OfflineFireRedAsrModelConfig( + encoder: encoder, + decoder: decoder, + ); + + final modelConfig = sherpa_onnx.OfflineModelConfig( + fireRedAsr: fireRedAsr, + tokens: tokens, + debug: false, + numThreads: 1, + ); + final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig); + final recognizer = sherpa_onnx.OfflineRecognizer(config); + + final waveData = sherpa_onnx.readWave(inputWav); + final stream = recognizer.createStream(); + + stream.acceptWaveform( + samples: waveData.samples, sampleRate: waveData.sampleRate); + recognizer.decode(stream); + + final result = recognizer.getResult(stream); + print(result.text); + + stream.free(); + recognizer.free(); +} diff --git a/dart-api-examples/non-streaming-asr/run-fire-red-asr.sh b/dart-api-examples/non-streaming-asr/run-fire-red-asr.sh new file mode 100755 index 00000000..f8a0f32a --- /dev/null +++ b/dart-api-examples/non-streaming-asr/run-fire-red-asr.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +set -ex + +if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 + tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 + rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 + ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16 +fi + +dart pub get + +dart run \ + ./bin/fire-red-asr.dart \ + --encoder ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx \ + --decoder ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx \ + --tokens ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt \ + --input-wav ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav diff --git a/dotnet-examples/offline-decode-files/run-fire-red-asr.sh b/dotnet-examples/offline-decode-files/run-fire-red-asr.sh index 5156f618..7cce8b0d 100755 --- a/dotnet-examples/offline-decode-files/run-fire-red-asr.sh +++ b/dotnet-examples/offline-decode-files/run-fire-red-asr.sh @@ -2,12 +2,12 @@ set -ex - if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 - tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 - rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 - ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16 - fi +if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 + tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 + rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 + ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16 +fi dotnet run \ --num-threads=2 \ diff --git a/flutter/sherpa_onnx/lib/src/offline_recognizer.dart b/flutter/sherpa_onnx/lib/src/offline_recognizer.dart index 01bceccc..e7a68845 100644 --- a/flutter/sherpa_onnx/lib/src/offline_recognizer.dart +++ b/flutter/sherpa_onnx/lib/src/offline_recognizer.dart @@ -68,6 +68,20 @@ class OfflineWhisperModelConfig { final int tailPaddings; } +class OfflineFireRedAsrModelConfig { + const OfflineFireRedAsrModelConfig( + {this.encoder = '', + this.decoder = ''}); + + @override + String toString() { + return 'OfflineFireRedAsrModelConfig(encoder: $encoder, decoder: $decoder)'; + } + + final String encoder; + final String decoder; +} + class OfflineMoonshineModelConfig { const OfflineMoonshineModelConfig( {this.preprocessor = '', @@ -135,6 +149,7 @@ class OfflineModelConfig { this.tdnn = const OfflineTdnnModelConfig(), this.senseVoice = const OfflineSenseVoiceModelConfig(), this.moonshine = const OfflineMoonshineModelConfig(), + this.fireRedAsr = const OfflineFireRedAsrModelConfig(), required this.tokens, this.numThreads = 1, this.debug = true, @@ -147,7 +162,7 @@ class OfflineModelConfig { @override String toString() { - return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)'; + return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, fireRedAsr: $fireRedAsr, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)'; } final OfflineTransducerModelConfig transducer; @@ -157,6 +172,7 @@ class OfflineModelConfig { final OfflineTdnnModelConfig tdnn; final OfflineSenseVoiceModelConfig senseVoice; final OfflineMoonshineModelConfig moonshine; + final OfflineFireRedAsrModelConfig fireRedAsr; final String tokens; final int numThreads; @@ -288,6 +304,10 @@ class OfflineRecognizer { c.ref.model.moonshine.cachedDecoder = config.model.moonshine.cachedDecoder.toNativeUtf8(); + // FireRedAsr + c.ref.model.fireRedAsr.encoder = config.model.fireRedAsr.encoder.toNativeUtf8(); + c.ref.model.fireRedAsr.decoder = config.model.fireRedAsr.decoder.toNativeUtf8(); + c.ref.model.tokens = config.model.tokens.toNativeUtf8(); c.ref.model.numThreads = config.model.numThreads; @@ -325,6 +345,8 @@ class OfflineRecognizer { calloc.free(c.ref.model.modelType); calloc.free(c.ref.model.provider); calloc.free(c.ref.model.tokens); + calloc.free(c.ref.model.fireRedAsr.decoder); + calloc.free(c.ref.model.fireRedAsr.encoder); calloc.free(c.ref.model.moonshine.cachedDecoder); calloc.free(c.ref.model.moonshine.uncachedDecoder); calloc.free(c.ref.model.moonshine.encoder); diff --git a/flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart b/flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart index d59fb053..e3d9906f 100644 --- a/flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart +++ b/flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart @@ -248,6 +248,11 @@ final class SherpaOnnxOfflineMoonshineModelConfig extends Struct { external Pointer cachedDecoder; } +final class SherpaOnnxOfflineFireRedAsrModelConfig extends Struct { + external Pointer encoder; + external Pointer decoder; +} + final class SherpaOnnxOfflineTdnnModelConfig extends Struct { external Pointer model; } @@ -291,6 +296,7 @@ final class SherpaOnnxOfflineModelConfig extends Struct { external SherpaOnnxOfflineSenseVoiceModelConfig senseVoice; external SherpaOnnxOfflineMoonshineModelConfig moonshine; + external SherpaOnnxOfflineFireRedAsrModelConfig fireRedAsr; } final class SherpaOnnxOfflineRecognizerConfig extends Struct {