diff --git a/.github/workflows/pascal.yaml b/.github/workflows/pascal.yaml index aed04e28..4f0fb013 100644 --- a/.github/workflows/pascal.yaml +++ b/.github/workflows/pascal.yaml @@ -125,6 +125,56 @@ jobs: cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/tts fi + - name: Run Pascal test (Non Streaming ASR) + shell: bash + run: | + export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH + + cd ./pascal-api-examples + + pushd non-streaming-asr + ./run-zipformer-transducer.sh + rm -rf sherpa-onnx-* + echo "---" + + ./run-moonshine.sh + rm -rf sherpa-onnx-* + echo "---" + + ./run-fire-red-asr.sh + rm -rf sherpa-onnx-fire-red-asr* + echo "---" + + ./run-whisper.sh + rm -rf sherpa-onnx-* + echo "---" + + ./run-nemo-transducer.sh + rm -rf sherpa-onnx-* + echo "---" + + ./run-nemo-ctc.sh + rm -rf sherpa-onnx-* + echo "---" + + ./run-sense-voice.sh + rm -rf sherpa-onnx-* + echo "---" + + ./run-telespeech-ctc.sh + rm -rf sherpa-onnx-* + echo "---" + + ./run-paraformer.sh + + ./run-paraformer-itn.sh + + rm -rf sherpa-onnx-* + echo "---" + + ls -lh + popd + - name: Run Pascal test (Speaker diarization) shell: bash run: | @@ -235,52 +285,6 @@ jobs: ls -lh popd - - name: Run Pascal test (Non Streaming ASR) - shell: bash - run: | - export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH - - cd ./pascal-api-examples - - pushd non-streaming-asr - ./run-zipformer-transducer.sh - rm -rf sherpa-onnx-* - echo "---" - - ./run-moonshine.sh - rm -rf sherpa-onnx-* - echo "---" - - ./run-whisper.sh - rm -rf sherpa-onnx-* - echo "---" - - ./run-nemo-transducer.sh - rm -rf sherpa-onnx-* - echo "---" - - ./run-nemo-ctc.sh - rm -rf sherpa-onnx-* - echo "---" - - ./run-sense-voice.sh - rm -rf sherpa-onnx-* - echo "---" - - ./run-telespeech-ctc.sh - rm -rf sherpa-onnx-* - echo "---" - - ./run-paraformer.sh - - ./run-paraformer-itn.sh - - rm -rf sherpa-onnx-* - echo "---" - - ls -lh - popd - - name: Run Pascal test (Streaming ASR) shell: bash run: | diff --git a/pascal-api-examples/non-streaming-asr/fire_red_asr.pas b/pascal-api-examples/non-streaming-asr/fire_red_asr.pas new file mode 100644 index 00000000..0f5e3354 --- /dev/null +++ b/pascal-api-examples/non-streaming-asr/fire_red_asr.pas @@ -0,0 +1,77 @@ +{ Copyright (c) 2025 Xiaomi Corporation } + +{ +This file shows how to use a non-streaming FireRedAsr AED model +to decode files. + +You can download the model files from +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models +} + +program fire_red_asr; + +{$mode objfpc} + +uses + sherpa_onnx, + DateUtils, + SysUtils; + +var + Wave: TSherpaOnnxWave; + WaveFilename: AnsiString; + + Config: TSherpaOnnxOfflineRecognizerConfig; + Recognizer: TSherpaOnnxOfflineRecognizer; + Stream: TSherpaOnnxOfflineStream; + RecognitionResult: TSherpaOnnxOfflineRecognizerResult; + + Start: TDateTime; + Stop: TDateTime; + + Elapsed: Single; + Duration: Single; + RealTimeFactor: Single; +begin + Initialize(Config); + + Config.ModelConfig.FireRedAsr.Encoder := './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx'; + Config.ModelConfig.FireRedAsr.Decoder := './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx'; + Config.ModelConfig.Tokens := './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt'; + Config.ModelConfig.Provider := 'cpu'; + Config.ModelConfig.NumThreads := 1; + Config.ModelConfig.Debug := False; + + WaveFilename := './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav'; + + Wave := SherpaOnnxReadWave(WaveFilename); + + Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config); + Stream := Recognizer.CreateStream(); + Start := Now; + + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate); + Recognizer.Decode(Stream); + + RecognitionResult := Recognizer.GetResult(Stream); + + Stop := Now; + + Elapsed := MilliSecondsBetween(Stop, Start) / 1000; + Duration := Length(Wave.Samples) / Wave.SampleRate; + RealTimeFactor := Elapsed / Duration; + + WriteLn(RecognitionResult.ToString); + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads])); + WriteLn(Format('Elapsed %.3f s', [Elapsed])); + WriteLn(Format('Wave duration %.3f s', [Duration])); + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor])); + + {Free resources to avoid memory leak. + + Note: You don't need to invoke them for this simple script. + However, you have to invoke them in your own large/complex project. + } + FreeAndNil(Stream); + FreeAndNil(Recognizer); +end. diff --git a/pascal-api-examples/non-streaming-asr/run-fire-red-asr.sh b/pascal-api-examples/non-streaming-asr/run-fire-red-asr.sh new file mode 100755 index 00000000..e2d8f50c --- /dev/null +++ b/pascal-api-examples/non-streaming-asr/run-fire-red-asr.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash + +set -ex + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) + +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" + +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then + mkdir -p ../../build + pushd ../../build + cmake \ + -DCMAKE_INSTALL_PREFIX=./install \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + .. + + cmake --build . --target install --config Release + ls -lh lib + popd +fi + +if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 + tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 + rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 + ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16 +fi + + +fpc \ + -dSHERPA_ONNX_USE_SHARED_LIBS \ + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ + -Fl$SHERPA_ONNX_DIR/build/install/lib \ + ./fire_red_asr.pas + +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH + +./fire_red_asr diff --git a/sherpa-onnx/pascal-api/sherpa_onnx.pas b/sherpa-onnx/pascal-api/sherpa_onnx.pas index 3d57bf18..ea03ab0f 100644 --- a/sherpa-onnx/pascal-api/sherpa_onnx.pas +++ b/sherpa-onnx/pascal-api/sherpa_onnx.pas @@ -288,6 +288,12 @@ type function ToString: AnsiString; end; + TSherpaOnnxOfflineFireRedAsrModelConfig = record + Encoder: AnsiString; + Decoder: AnsiString; + function ToString: AnsiString; + end; + TSherpaOnnxOfflineTdnnModelConfig = record Model: AnsiString; function ToString: AnsiString; @@ -324,6 +330,7 @@ type TeleSpeechCtc: AnsiString; SenseVoice: TSherpaOnnxOfflineSenseVoiceModelConfig; Moonshine: TSherpaOnnxOfflineMoonshineModelConfig; + FireRedAsr: TSherpaOnnxOfflineFireRedAsrModelConfig; class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig); function ToString: AnsiString; end; @@ -656,6 +663,10 @@ type Task: PAnsiChar; TailPaddings: cint32; end; + SherpaOnnxOfflineFireRedAsrModelConfig = record + Encoder: PAnsiChar; + Decoder: PAnsiChar; + end; SherpaOnnxOfflineMoonshineModelConfig = record Preprocessor: PAnsiChar; Encoder: PAnsiChar; @@ -690,6 +701,7 @@ type TeleSpeechCtc: PAnsiChar; SenseVoice: SherpaOnnxOfflineSenseVoiceModelConfig; Moonshine: SherpaOnnxOfflineMoonshineModelConfig; + FireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig; end; SherpaOnnxOfflineRecognizerConfig = record @@ -1382,6 +1394,14 @@ begin [Self.Encoder, Self.Decoder, Self.Language, Self.Task, Self.TailPaddings]); end; +function TSherpaOnnxOfflineFireRedAsrModelConfig.ToString: AnsiString; +begin + Result := Format('TSherpaOnnxOfflineFireRedAsrModelConfig(' + + 'Encoder := %s, ' + + 'Decoder := %s)', + [Self.Encoder, Self.Decoder]); +end; + function TSherpaOnnxOfflineMoonshineModelConfig.ToString: AnsiString; begin Result := Format('TSherpaOnnxOfflineMoonshineModelConfig(' + @@ -1434,13 +1454,15 @@ begin 'BpeVocab := %s, ' + 'TeleSpeechCtc := %s, ' + 'SenseVoice := %s, ' + - 'Moonshine := %s' + + 'Moonshine := %s, ' + + 'FireRedAsr := %s' + ')', [Self.Transducer.ToString, Self.Paraformer.ToString, Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString, Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider, Self.ModelType, Self.ModelingUnit, Self.BpeVocab, - Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString + Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString, + Self.FireRedAsr.ToString ]); end; @@ -1506,6 +1528,9 @@ begin C.ModelConfig.Moonshine.UncachedDecoder := PAnsiChar(Config.ModelConfig.Moonshine.UncachedDecoder); C.ModelConfig.Moonshine.CachedDecoder := PAnsiChar(Config.ModelConfig.Moonshine.CachedDecoder); + C.ModelConfig.FireRedAsr.Encoder := PAnsiChar(Config.ModelConfig.FireRedAsr.Encoder); + C.ModelConfig.FireRedAsr.Decoder := PAnsiChar(Config.ModelConfig.FireRedAsr.Decoder); + C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model); C.LMConfig.Scale := Config.LMConfig.Scale;