diff --git a/.github/workflows/pascal.yaml b/.github/workflows/pascal.yaml index 55285c3f..9ff7b229 100644 --- a/.github/workflows/pascal.yaml +++ b/.github/workflows/pascal.yaml @@ -39,7 +39,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, macos-latest, macos-13] + os: [ubuntu-latest, macos-latest, macos-13, windows-latest] steps: - uses: actions/checkout@v4 @@ -64,10 +64,19 @@ jobs: run: | brew install fpc # brew install --cask lazarus + # + - name: Install Free pascal compiler (windows) + if: matrix.os == 'windows-latest' + shell: bash + run: | + choco install lazarus + + ls -lh /c/lazarus/fpc/3.2.2/bin/x86_64-win64/ - name: FPC info shell: bash run: | + export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH which fpc fpc -i @@ -87,6 +96,7 @@ jobs: cd build cmake \ + -DCMAKE_INSTALL_PREFIX=./install \ -D BUILD_SHARED_LIBS=ON \ -D SHERPA_ONNX_ENABLE_BINARY=OFF \ -D CMAKE_BUILD_TYPE=Release \ @@ -98,15 +108,55 @@ jobs: export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" cd build - make -j2 sherpa-onnx-c-api + cmake --build . --target install --config Release - - name: Run Pascal test + ls -lh install/lib/ + + if [[ ${{ matrix.os }} == 'windows-latest' ]]; then + cp -v install/lib/*.dll ../pascal-api-examples/read-wav + cp -v install/lib/*.dll ../pascal-api-examples/streaming-asr + + cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/read-wav + cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/streaming-asr + fi + + - name: Run Pascal test (Read wav test) shell: bash run: | + export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH + cd ./pascal-api-examples - echo "----read-wav test-----" pushd read-wav ./run.sh + echo "---" + ls -lh + popd + + - name: Run Pascal test (Streaming ASR) + shell: bash + run: | + export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH + + cd ./pascal-api-examples + + pushd streaming-asr + ./run-zipformer-transducer.sh + rm -rf sherpa-onnx-* + echo "---" + + if [[ ${{ matrix.os }} != 'windows-latest' ]]; then + ./run-paraformer.sh + rm -rf sherpa-onnx-* + echo "---" + + ./run-zipformer-ctc.sh + echo "---" + + ./run-zipformer-ctc-hlg.sh + rm -rf sherpa-onnx-* + echo "---" + fi + ls -lh popd diff --git a/java-api-examples/StreamingDecodeFileCtcHLG.java b/java-api-examples/StreamingDecodeFileCtcHLG.java index 73a73873..cfa83a36 100644 --- a/java-api-examples/StreamingDecodeFileCtcHLG.java +++ b/java-api-examples/StreamingDecodeFileCtcHLG.java @@ -29,7 +29,7 @@ public class StreamingDecodeFileCtcHLG { .build(); OnlineCtcFstDecoderConfig ctcFstDecoderConfig = - OnlineCtcFstDecoderConfig.builder().setGraph("hlg").build(); + OnlineCtcFstDecoderConfig.builder().setGraph(hlg).build(); OnlineRecognizerConfig config = OnlineRecognizerConfig.builder() diff --git a/pascal-api-examples/README.md b/pascal-api-examples/README.md new file mode 100644 index 00000000..4e4310d1 --- /dev/null +++ b/pascal-api-examples/README.md @@ -0,0 +1,9 @@ +# Introduction + +This directory contains examples for how to use the [Object Pascal](https://en.wikipedia.org/wiki/Object_Pascal) +APIs of [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx). + +|Directory| Description| +|---------|------------| +|[read-wav](./read-wav)|It shows how to read a wave file.| +|[streaming-asr](./streaming-asr)| It shows how to use streaming models for speech recognition.| diff --git a/pascal-api-examples/read-wav/run.sh b/pascal-api-examples/read-wav/run.sh index 60bb803c..f71fbe90 100755 --- a/pascal-api-examples/read-wav/run.sh +++ b/pascal-api-examples/read-wav/run.sh @@ -7,10 +7,11 @@ SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" -if [[ ! -f ../../build/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/lib/libsherpa-onnx-c-api.so ]]; then +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then mkdir -p ../../build pushd ../../build cmake \ + -DCMAKE_INSTALL_PREFIX=./install \ -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ -DSHERPA_ONNX_ENABLE_TESTS=OFF \ -DSHERPA_ONNX_ENABLE_CHECK=OFF \ @@ -18,8 +19,7 @@ if [[ ! -f ../../build/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/lib/l -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ .. - make -j4 sherpa-onnx-c-api - ls -lh lib + cmake --build . --target install --config Release popd fi @@ -29,10 +29,10 @@ fi fpc \ -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ - -Fl$SHERPA_ONNX_DIR/build/lib \ + -Fl$SHERPA_ONNX_DIR/build/install/lib \ ./main.pas -export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/lib:$LD_LIBRARY_PATH -export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/lib:$DYLD_LIBRARY_PATH +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH ./main diff --git a/pascal-api-examples/streaming-asr/.gitignore b/pascal-api-examples/streaming-asr/.gitignore new file mode 100644 index 00000000..9bfca7ff --- /dev/null +++ b/pascal-api-examples/streaming-asr/.gitignore @@ -0,0 +1,4 @@ +zipformer_transducer +paraformer +zipformer_ctc +zipformer_ctc_hlg diff --git a/pascal-api-examples/streaming-asr/README.md b/pascal-api-examples/streaming-asr/README.md new file mode 100644 index 00000000..cbd752ea --- /dev/null +++ b/pascal-api-examples/streaming-asr/README.md @@ -0,0 +1,11 @@ +# Introduction + +This folder contains examples about using sherpa-onnx's object pascal +APIs with streaming models for speech recognition. + +|File|Description| +|----|-----------| +|[run-paraformer.sh](./run-paraformer.sh)|Use a streaming Paraformer model for speech recognition| +|[run-zipformer-ctc-hlg.sh](./run-zipformer-ctc-hlg.sh)|Use a streaming Zipformer CTC model for speech recognition| +|[run-zipformer-ctc.sh](./run-zipformer-ctc.sh)|Use a streaming Zipformer CTC model with HLG for speech recognition| +|[run-zipformer-transducer.sh](./run-zipformer-transducer.sh)|Use a Zipformer transducer model for speech recognition| diff --git a/pascal-api-examples/streaming-asr/paraformer.pas b/pascal-api-examples/streaming-asr/paraformer.pas new file mode 100644 index 00000000..a47b1d42 --- /dev/null +++ b/pascal-api-examples/streaming-asr/paraformer.pas @@ -0,0 +1,88 @@ +{ Copyright (c) 2024 Xiaomi Corporation } + +{ +This file shows how to use a streaming Paraformer model to decode files. + +You can download the model files from +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models +} + +program paraformer; + +{$mode objfpc} + +uses + sherpa_onnx, + DateUtils, + SysUtils; + +var + Config: TSherpaOnnxOnlineRecognizerConfig; + Recognizer: TSherpaOnnxOnlineRecognizer; + Stream: TSherpaOnnxOnlineStream; + RecognitionResult: TSherpaOnnxOnlineRecognizerResult; + Wave: TSherpaOnnxWave; + WaveFilename: AnsiString; + TailPaddings: array of Single; + + Start: TDateTime; + Stop: TDateTime; + + Elapsed: Single; + Duration: Single; + RealTimeFactor: Single; +begin + Initialize(Config); + + {Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models + to download model files used in this file.} + Config.ModelConfig.Paraformer.Encoder := './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx'; + Config.ModelConfig.Paraformer.Decoder := './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx'; + Config.ModelConfig.Tokens := './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt'; + + Config.ModelConfig.Provider := 'cpu'; + Config.ModelConfig.NumThreads := 1; + Config.ModelConfig.Debug := False; + + WaveFilename := './sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/2.wav'; + + Wave := SherpaOnnxReadWave(WaveFilename); + + Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config); + + Start := Now; + + Stream := Recognizer.CreateStream(); + + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate); + + SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding} + Stream.AcceptWaveform(TailPaddings, Wave.SampleRate); + + Stream.InputFinished(); + + while Recognizer.IsReady(Stream) do + Recognizer.Decode(Stream); + + RecognitionResult := Recognizer.GetResult(Stream); + + Stop := Now; + + Elapsed := MilliSecondsBetween(Stop, Start) / 1000; + Duration := Length(Wave.Samples) / Wave.SampleRate; + RealTimeFactor := Elapsed / Duration; + + WriteLn(RecognitionResult.ToString); + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads])); + WriteLn(Format('Elapsed %.3f s', [Elapsed])); + WriteLn(Format('Wave duration %.3f s', [Duration])); + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor])); + + {Free resources to avoid memory leak. + + Note: You don't need to invoke them for this simple script. + However, you have to invoke them in your own large/complex project. + } + FreeAndNil(Stream); + FreeAndNil(Recognizer); +end. diff --git a/pascal-api-examples/streaming-asr/run-paraformer.sh b/pascal-api-examples/streaming-asr/run-paraformer.sh new file mode 100755 index 00000000..aa6f056e --- /dev/null +++ b/pascal-api-examples/streaming-asr/run-paraformer.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash + +set -ex + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) + +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" + +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then + mkdir -p ../../build + pushd ../../build + cmake \ + -DCMAKE_INSTALL_PREFIX=./install \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + .. + + cmake --build . --target install --config Release + ls -lh lib + popd +fi + + +if [ ! -f ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 + tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 + rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 +fi + +fpc \ + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ + -Fl$SHERPA_ONNX_DIR/build/install/lib \ + ./paraformer.pas + +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH + +./paraformer diff --git a/pascal-api-examples/streaming-asr/run-zipformer-ctc-hlg.sh b/pascal-api-examples/streaming-asr/run-zipformer-ctc-hlg.sh new file mode 100755 index 00000000..e0d97508 --- /dev/null +++ b/pascal-api-examples/streaming-asr/run-zipformer-ctc-hlg.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +set -ex + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) + +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" + +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then + mkdir -p ../../build + pushd ../../build + cmake \ + -DCMAKE_INSTALL_PREFIX=./install \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + .. + + cmake --build . --target install --config Release + ls -lh lib + popd +fi + +if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 + tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 + rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 +fi + +fpc \ + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ + -Fl$SHERPA_ONNX_DIR/build/install/lib \ + ./zipformer_ctc_hlg.pas + +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH + +./zipformer_ctc_hlg diff --git a/pascal-api-examples/streaming-asr/run-zipformer-ctc.sh b/pascal-api-examples/streaming-asr/run-zipformer-ctc.sh new file mode 100755 index 00000000..4892c1ae --- /dev/null +++ b/pascal-api-examples/streaming-asr/run-zipformer-ctc.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +set -ex + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) + +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" + +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then + mkdir -p ../../build + pushd ../../build + cmake \ + -DCMAKE_INSTALL_PREFIX=./install \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + .. + + cmake --build . --target install --config Release + ls -lh lib + popd +fi + +if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 + tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 + rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 +fi + +fpc \ + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ + -Fl$SHERPA_ONNX_DIR/build/install/lib \ + ./zipformer_ctc.pas + +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH + +./zipformer_ctc diff --git a/pascal-api-examples/streaming-asr/run-zipformer-transducer.sh b/pascal-api-examples/streaming-asr/run-zipformer-transducer.sh new file mode 100755 index 00000000..ddcce781 --- /dev/null +++ b/pascal-api-examples/streaming-asr/run-zipformer-transducer.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash + +set -ex + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) + +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" + +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then + mkdir -p ../../build + pushd ../../build + cmake \ + -DCMAKE_INSTALL_PREFIX=./install \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + .. + + cmake --build . --target install --config Release + ls -lh lib + popd +fi + +if [ ! -f ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 +fi + + +fpc \ + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ + -Fl$SHERPA_ONNX_DIR/build/install/lib \ + ./zipformer_transducer.pas + +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH + +./zipformer_transducer diff --git a/pascal-api-examples/streaming-asr/zipformer_ctc.pas b/pascal-api-examples/streaming-asr/zipformer_ctc.pas new file mode 100644 index 00000000..53a8497e --- /dev/null +++ b/pascal-api-examples/streaming-asr/zipformer_ctc.pas @@ -0,0 +1,87 @@ +{ Copyright (c) 2024 Xiaomi Corporation } + +{ +This file shows how to use a streaming Zipformer CTC model +to decode files. + +You can download the model files from +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models +} + +program zipformer_ctc; + +{$mode objfpc} + +uses + sherpa_onnx, + DateUtils, + SysUtils; + +var + Config: TSherpaOnnxOnlineRecognizerConfig; + Recognizer: TSherpaOnnxOnlineRecognizer; + Stream: TSherpaOnnxOnlineStream; + RecognitionResult: TSherpaOnnxOnlineRecognizerResult; + Wave: TSherpaOnnxWave; + WaveFilename: AnsiString; + TailPaddings: array of Single; + + Start: TDateTime; + Stop: TDateTime; + + Elapsed: Single; + Duration: Single; + RealTimeFactor: Single; +begin + Initialize(Config); + + {Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models + to download model files used in this file.} + Config.ModelConfig.Zipformer2Ctc.Model := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx'; + Config.ModelConfig.Tokens := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt'; + Config.ModelConfig.Provider := 'cpu'; + Config.ModelConfig.NumThreads := 1; + Config.ModelConfig.Debug := False; + + WaveFilename := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav'; + + Wave := SherpaOnnxReadWave(WaveFilename); + + Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config); + + Start := Now; + + Stream := Recognizer.CreateStream(); + + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate); + + SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding} + Stream.AcceptWaveform(TailPaddings, Wave.SampleRate); + + Stream.InputFinished(); + + while Recognizer.IsReady(Stream) do + Recognizer.Decode(Stream); + + RecognitionResult := Recognizer.GetResult(Stream); + + Stop := Now; + + Elapsed := MilliSecondsBetween(Stop, Start) / 1000; + Duration := Length(Wave.Samples) / Wave.SampleRate; + RealTimeFactor := Elapsed / Duration; + + WriteLn(RecognitionResult.ToString); + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads])); + WriteLn(Format('Elapsed %.3f s', [Elapsed])); + WriteLn(Format('Wave duration %.3f s', [Duration])); + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor])); + + {Free resources to avoid memory leak. + + Note: You don't need to invoke them for this simple script. + However, you have to invoke them in your own large/complex project. + } + FreeAndNil(Stream); + FreeAndNil(Recognizer); +end. diff --git a/pascal-api-examples/streaming-asr/zipformer_ctc_hlg.pas b/pascal-api-examples/streaming-asr/zipformer_ctc_hlg.pas new file mode 100644 index 00000000..80e575e9 --- /dev/null +++ b/pascal-api-examples/streaming-asr/zipformer_ctc_hlg.pas @@ -0,0 +1,88 @@ +{ Copyright (c) 2024 Xiaomi Corporation } + +{ +This file shows how to use a streaming Zipformer CTC model +with HLG to decode files. + +You can download the model files from +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models +} + +program zipformer_ctc_hlg; + +{$mode objfpc} + +uses + sherpa_onnx, + DateUtils, + SysUtils; + +var + Config: TSherpaOnnxOnlineRecognizerConfig; + Recognizer: TSherpaOnnxOnlineRecognizer; + Stream: TSherpaOnnxOnlineStream; + RecognitionResult: TSherpaOnnxOnlineRecognizerResult; + Wave: TSherpaOnnxWave; + WaveFilename: AnsiString; + TailPaddings: array of Single; + + Start: TDateTime; + Stop: TDateTime; + + Elapsed: Single; + Duration: Single; + RealTimeFactor: Single; +begin + Initialize(Config); + + {Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models + to download model files used in this file.} + Config.ModelConfig.Zipformer2Ctc.Model := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx'; + Config.ModelConfig.Tokens := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt'; + Config.ModelConfig.Provider := 'cpu'; + Config.ModelConfig.NumThreads := 1; + Config.ModelConfig.Debug := True; + Config.CtcFstDecoderConfig.Graph := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst'; + + WaveFilename := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav'; + + Wave := SherpaOnnxReadWave(WaveFilename); + + Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config); + + Start := Now; + + Stream := Recognizer.CreateStream(); + + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate); + + SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding} + Stream.AcceptWaveform(TailPaddings, Wave.SampleRate); + + Stream.InputFinished(); + + while Recognizer.IsReady(Stream) do + Recognizer.Decode(Stream); + + RecognitionResult := Recognizer.GetResult(Stream); + + Stop := Now; + + Elapsed := MilliSecondsBetween(Stop, Start) / 1000; + Duration := Length(Wave.Samples) / Wave.SampleRate; + RealTimeFactor := Elapsed / Duration; + + WriteLn(RecognitionResult.ToString); + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads])); + WriteLn(Format('Elapsed %.3f s', [Elapsed])); + WriteLn(Format('Wave duration %.3f s', [Duration])); + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor])); + + {Free resources to avoid memory leak. + + Note: You don't need to invoke them for this simple script. + However, you have to invoke them in your own large/complex project. + } + FreeAndNil(Stream); + FreeAndNil(Recognizer); +end. diff --git a/pascal-api-examples/streaming-asr/zipformer_transducer.pas b/pascal-api-examples/streaming-asr/zipformer_transducer.pas new file mode 100644 index 00000000..303c000a --- /dev/null +++ b/pascal-api-examples/streaming-asr/zipformer_transducer.pas @@ -0,0 +1,89 @@ +{ Copyright (c) 2024 Xiaomi Corporation } + +{ +This file shows how to use a streaming Zipformer transducer +to decode files. + +You can download the model files from +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models +} + +program zipformer_transducer; + +{$mode objfpc} + +uses + sherpa_onnx, + DateUtils, + SysUtils; + +var + Config: TSherpaOnnxOnlineRecognizerConfig; + Recognizer: TSherpaOnnxOnlineRecognizer; + Stream: TSherpaOnnxOnlineStream; + RecognitionResult: TSherpaOnnxOnlineRecognizerResult; + Wave: TSherpaOnnxWave; + WaveFilename: AnsiString; + TailPaddings: array of Single; + + Start: TDateTime; + Stop: TDateTime; + + Elapsed: Single; + Duration: Single; + RealTimeFactor: Single; +begin + Initialize(Config); + + {Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models + to download model files used in this file.} + Config.ModelConfig.Transducer.Encoder := './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx'; + Config.ModelConfig.Transducer.Decoder := './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx'; + Config.ModelConfig.Transducer.Joiner := './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx'; + Config.ModelConfig.Tokens := './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt'; + Config.ModelConfig.Provider := 'cpu'; + Config.ModelConfig.NumThreads := 1; + Config.ModelConfig.Debug := False; + + WaveFilename := './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav'; + + Wave := SherpaOnnxReadWave(WaveFilename); + + Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config); + + Start := Now; + + Stream := Recognizer.CreateStream(); + + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate); + + SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding} + Stream.AcceptWaveform(TailPaddings, Wave.SampleRate); + + Stream.InputFinished(); + + while Recognizer.IsReady(Stream) do + Recognizer.Decode(Stream); + + RecognitionResult := Recognizer.GetResult(Stream); + + Stop := Now; + + Elapsed := MilliSecondsBetween(Stop, Start) / 1000; + Duration := Length(Wave.Samples) / Wave.SampleRate; + RealTimeFactor := Elapsed / Duration; + + WriteLn(RecognitionResult.ToString); + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads])); + WriteLn(Format('Elapsed %.3f s', [Elapsed])); + WriteLn(Format('Wave duration %.3f s', [Duration])); + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor])); + + {Free resources to avoid memory leak. + + Note: You don't need to invoke them for this simple script. + However, you have to invoke them in your own large/complex project. + } + FreeAndNil(Stream); + FreeAndNil(Recognizer); +end. diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index 9e9945f1..03106a88 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -75,17 +75,31 @@ SherpaOnnxOnlineRecognizer *SherpaOnnxCreateOnlineRecognizer( SHERPA_ONNX_OR(config->model_config.num_threads, 1); recognizer_config.model_config.provider_config.provider = SHERPA_ONNX_OR(config->model_config.provider, "cpu"); + + if (recognizer_config.model_config.provider_config.provider.empty()) { + recognizer_config.model_config.provider_config.provider = "cpu"; + } + recognizer_config.model_config.model_type = SHERPA_ONNX_OR(config->model_config.model_type, ""); recognizer_config.model_config.debug = SHERPA_ONNX_OR(config->model_config.debug, 0); recognizer_config.model_config.modeling_unit = SHERPA_ONNX_OR(config->model_config.modeling_unit, "cjkchar"); + + if (recognizer_config.model_config.modeling_unit.empty()) { + recognizer_config.model_config.modeling_unit = "cjkchar"; + } + recognizer_config.model_config.bpe_vocab = SHERPA_ONNX_OR(config->model_config.bpe_vocab, ""); recognizer_config.decoding_method = SHERPA_ONNX_OR(config->decoding_method, "greedy_search"); + if (recognizer_config.decoding_method.empty()) { + recognizer_config.decoding_method = "greedy_search"; + } + recognizer_config.max_active_paths = SHERPA_ONNX_OR(config->max_active_paths, 4); @@ -391,10 +405,19 @@ sherpa_onnx::OfflineRecognizerConfig convertConfig( SHERPA_ONNX_OR(config->model_config.debug, 0); recognizer_config.model_config.provider = SHERPA_ONNX_OR(config->model_config.provider, "cpu"); + if (recognizer_config.model_config.provider.empty()) { + recognizer_config.model_config.provider = "cpu"; + } + recognizer_config.model_config.model_type = SHERPA_ONNX_OR(config->model_config.model_type, ""); recognizer_config.model_config.modeling_unit = SHERPA_ONNX_OR(config->model_config.modeling_unit, "cjkchar"); + + if (recognizer_config.model_config.modeling_unit.empty()) { + recognizer_config.model_config.modeling_unit = "cjkchar"; + } + recognizer_config.model_config.bpe_vocab = SHERPA_ONNX_OR(config->model_config.bpe_vocab, ""); @@ -620,6 +643,10 @@ SherpaOnnxKeywordSpotter *SherpaOnnxCreateKeywordSpotter( SHERPA_ONNX_OR(config->model_config.num_threads, 1); spotter_config.model_config.provider_config.provider = SHERPA_ONNX_OR(config->model_config.provider, "cpu"); + if (spotter_config.model_config.provider_config.provider.empty()) { + spotter_config.model_config.provider_config.provider = "cpu"; + } + spotter_config.model_config.model_type = SHERPA_ONNX_OR(config->model_config.model_type, ""); spotter_config.model_config.debug = @@ -855,6 +882,10 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector( vad_config.sample_rate = SHERPA_ONNX_OR(config->sample_rate, 16000); vad_config.num_threads = SHERPA_ONNX_OR(config->num_threads, 1); vad_config.provider = SHERPA_ONNX_OR(config->provider, "cpu"); + if (vad_config.provider.empty()) { + vad_config.provider = "cpu"; + } + vad_config.debug = SHERPA_ONNX_OR(config->debug, false); if (vad_config.debug) { @@ -956,6 +987,10 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts( tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1); tts_config.model.debug = config->model.debug; tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); + if (tts_config.model.provider.empty()) { + tts_config.model.provider = "cpu"; + } + tts_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, ""); tts_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, ""); tts_config.max_num_sentences = SHERPA_ONNX_OR(config->max_num_sentences, 2); @@ -1101,6 +1136,9 @@ SherpaOnnxCreateSpokenLanguageIdentification( slid_config.num_threads = SHERPA_ONNX_OR(config->num_threads, 1); slid_config.debug = config->debug; slid_config.provider = SHERPA_ONNX_OR(config->provider, "cpu"); + if (slid_config.provider.empty()) { + slid_config.provider = "cpu"; + } if (slid_config.debug) { SHERPA_ONNX_LOGE("%s\n", slid_config.ToString().c_str()); @@ -1167,6 +1205,9 @@ SherpaOnnxCreateSpeakerEmbeddingExtractor( c.num_threads = SHERPA_ONNX_OR(config->num_threads, 1); c.debug = SHERPA_ONNX_OR(config->debug, 0); c.provider = SHERPA_ONNX_OR(config->provider, "cpu"); + if (c.provider.empty()) { + c.provider = "cpu"; + } if (config->debug) { SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str()); @@ -1401,6 +1442,10 @@ const SherpaOnnxAudioTagging *SherpaOnnxCreateAudioTagging( ac.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1); ac.model.debug = config->model.debug; ac.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); + if (ac.model.provider.empty()) { + ac.model.provider = "cpu"; + } + ac.labels = SHERPA_ONNX_OR(config->labels, ""); ac.top_k = SHERPA_ONNX_OR(config->top_k, 5); @@ -1487,6 +1532,9 @@ const SherpaOnnxOfflinePunctuation *SherpaOnnxCreateOfflinePunctuation( c.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1); c.model.debug = config->model.debug; c.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); + if (c.model.provider.empty()) { + c.model.provider = "cpu"; + } if (c.model.debug) { SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str()); diff --git a/sherpa-onnx/pascal-api/sherpa_onnx.pas b/sherpa-onnx/pascal-api/sherpa_onnx.pas index 62b1c3a8..a878b45e 100644 --- a/sherpa-onnx/pascal-api/sherpa_onnx.pas +++ b/sherpa-onnx/pascal-api/sherpa_onnx.pas @@ -4,6 +4,9 @@ unit sherpa_onnx; {$mode objfpc} +{$modeSwitch advancedRecords} { to support records with methods } +(* {$LongStrings ON} *) + interface type @@ -12,15 +15,117 @@ type SampleRate: Integer; end; + TSherpaOnnxOnlineTransducerModelConfig = record + Encoder: AnsiString; + Decoder: AnsiString; + Joiner: AnsiString; + function ToString: AnsiString; + end; + + TSherpaOnnxOnlineParaformerModelConfig = record + Encoder: AnsiString; + Decoder: AnsiString; + function ToString: AnsiString; + end; + + TSherpaOnnxOnlineZipformer2CtcModelConfig = record + Model: AnsiString; + function ToString: AnsiString; + end; + + TSherpaOnnxOnlineModelConfig = record + Transducer: TSherpaOnnxOnlineTransducerModelConfig; + Paraformer: TSherpaOnnxOnlineParaformerModelConfig; + Zipformer2Ctc: TSherpaOnnxOnlineZipformer2CtcModelConfig; + Tokens: AnsiString; + NumThreads: Integer; + Provider: AnsiString; + Debug: Boolean; + ModelType: AnsiString; + ModelingUnit: AnsiString; + BpeVocab: AnsiString; + function ToString: AnsiString; + end; + + TSherpaOnnxFeatureConfig = record + SampleRate: Integer; + FeatureDim: Integer; + function ToString: AnsiString; + end; + + TSherpaOnnxOnlineCtcFstDecoderConfig = record + Graph: AnsiString; + MaxActive: Integer; + function ToString: AnsiString; + end; + + TSherpaOnnxOnlineRecognizerConfig = record + FeatConfig: TSherpaOnnxFeatureConfig; + ModelConfig: TSherpaOnnxOnlineModelConfig; + DecodingMethod: AnsiString; + MaxActivePaths: Integer; + EnableEndpoint: Boolean; + Rule1MinTrailingSilence: Single; + Rule2MinTrailingSilence: Single; + Rule3MinUtteranceLength: Single; + HotwordsFile: AnsiString; + HotwordsScore: Single; + CtcFstDecoderConfig: TSherpaOnnxOnlineCtcFstDecoderConfig; + RuleFsts: AnsiString; + RuleFars: AnsiString; + BlankPenalty: Single; + function ToString: AnsiString; + end; + + TSherpaOnnxOnlineRecognizerResult = record + Text: AnsiString; + Tokens: array of AnsiString; + Timestamps: array of Single; + function ToString: AnsiString; + end; + + TSherpaOnnxOnlineStream = class + private + Handle: Pointer; + public + constructor Create(P: Pointer); + destructor Destroy; override; + procedure AcceptWaveform(Samples: array of Single; SampleRate: Integer); + procedure InputFinished; + end; + + TSherpaOnnxOnlineRecognizer = class + private + Handle: Pointer; + public + constructor Create(Config: TSherpaOnnxOnlineRecognizerConfig); + destructor Destroy; override; + + function CreateStream: TSherpaOnnxOnlineStream; overload; + function CreateStream(Hotwords: AnsiString): TSherpaOnnxOnlineStream; overload; + function IsReady(Stream: TSherpaOnnxOnlineStream): Boolean; + procedure Decode(Stream: TSherpaOnnxOnlineStream); + procedure Reset(Stream: TSherpaOnnxOnlineStream); + function IsEndpoint(Stream: TSherpaOnnxOnlineStream): Boolean; + function GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult; + end; + { It supports reading a single channel wave with 16-bit encoded samples. Samples are normalized to the range [-1, 1]. } -function SherpaOnnxReadWave(Filename: string): TSherpaOnnxWave; +function SherpaOnnxReadWave(Filename: AnsiString): TSherpaOnnxWave; implementation uses - ctypes; + ctypes, + fpjson, + { See + - https://wiki.freepascal.org/fcl-json + - https://www.freepascal.org/daily/doc/fcl/fpjson/getjson.html + } + jsonparser, + SysUtils; const {See https://www.freepascal.org/docs-html/prog/progap7.html} @@ -47,31 +152,383 @@ type PSherpaOnnxWave = ^SherpaOnnxWave; + SherpaOnnxOnlineTransducerModelConfig = record + Encoder: PAnsiChar; + Decoder: PAnsiChar; + Joiner: PAnsiChar; + end; + SherpaOnnxOnlineParaformerModelConfig = record + Encoder: PAnsiChar; + Decoder: PAnsiChar; + end; + SherpaOnnxOnlineZipformer2CtcModelConfig = record + Model: PAnsiChar; + end; + + SherpaOnnxOnlineModelConfig= record + Transducer: SherpaOnnxOnlineTransducerModelConfig; + Paraformer: SherpaOnnxOnlineParaformerModelConfig; + Zipformer2Ctc: SherpaOnnxOnlineZipformer2CtcModelConfig; + Tokens: PAnsiChar; + NumThreads: cint32; + Provider: PAnsiChar; + Debug: cint32; + ModelType: PAnsiChar; + ModelingUnit: PAnsiChar; + BpeVocab: PAnsiChar; + end; + SherpaOnnxFeatureConfig = record + SampleRate: cint32; + FeatureDim: cint32; + end; + SherpaOnnxOnlineCtcFstDecoderConfig = record + Graph: PAnsiChar; + MaxActive: cint32; + end; + SherpaOnnxOnlineRecognizerConfig = record + FeatConfig: SherpaOnnxFeatureConfig; + ModelConfig: SherpaOnnxOnlineModelConfig; + DecodingMethod: PAnsiChar; + MaxActivePaths: cint32; + EnableEndpoint: cint32; + Rule1MinTrailingSilence: Single; + Rule2MinTrailingSilence: Single; + Rule3MinUtteranceLength: Single; + HotwordsFile: PAnsiChar; + HotwordsScore: Single; + CtcFstDecoderConfig: SherpaOnnxOnlineCtcFstDecoderConfig; + RuleFsts: PAnsiChar; + RuleFars: PAnsiChar; + BlankPenalty: Single; + end; + + PSherpaOnnxOnlineRecognizerConfig = ^SherpaOnnxOnlineRecognizerConfig; + +function SherpaOnnxCreateOnlineRecognizer(Config: PSherpaOnnxOnlineRecognizerConfig): Pointer; cdecl; + external SherpaOnnxLibName; + +procedure SherpaOnnxDestroyOnlineRecognizer(Recognizer: Pointer); cdecl; + external SherpaOnnxLibName; + +function SherpaOnnxCreateOnlineStream(Recognizer: Pointer): Pointer; cdecl; + external SherpaOnnxLibName; + +function SherpaOnnxCreateOnlineStreamWithHotwords(Recognizer: Pointer; Hotwords: PAnsiChar): Pointer; cdecl; + external SherpaOnnxLibName; + +procedure SherpaOnnxDestroyOnlineStream(Recognizer: Pointer); cdecl; + external SherpaOnnxLibName; + +procedure SherpaOnnxOnlineStreamAcceptWaveform(Stream: Pointer; + SampleRate: cint32; Samples: pcfloat; N: cint32 ); cdecl; + external SherpaOnnxLibName; + +procedure SherpaOnnxOnlineStreamInputFinished(Stream: Pointer); cdecl; + external SherpaOnnxLibName; + +function SherpaOnnxIsOnlineStreamReady(Recognizer: Pointer; Stream: Pointer): cint32; cdecl; + external SherpaOnnxLibName; + +procedure SherpaOnnxDecodeOnlineStream(Recognizer: Pointer; Stream: Pointer); cdecl; + external SherpaOnnxLibName; + +procedure SherpaOnnxOnlineStreamReset(Recognizer: Pointer; Stream: Pointer); cdecl; + external SherpaOnnxLibName; + +function SherpaOnnxOnlineStreamIsEndpoint(Recognizer: Pointer; Stream: Pointer): cint32; cdecl; + external SherpaOnnxLibName; + +function SherpaOnnxGetOnlineStreamResultAsJson(Recognizer: Pointer; Stream: Pointer): PAnsiChar; cdecl; + external SherpaOnnxLibName; + +procedure SherpaOnnxDestroyOnlineStreamResultJson(PJson: PAnsiChar); cdecl; + external SherpaOnnxLibName; + function SherpaOnnxReadWaveWrapper(Filename: PAnsiChar): PSherpaOnnxWave; cdecl; external SherpaOnnxLibName name 'SherpaOnnxReadWave'; procedure SherpaOnnxFreeWaveWrapper(P: PSherpaOnnxWave); cdecl; external SherpaOnnxLibName name 'SherpaOnnxFreeWave'; -function SherpaOnnxReadWave(Filename: string): TSherpaOnnxWave; +function SherpaOnnxReadWave(Filename: AnsiString): TSherpaOnnxWave; var - AnsiFilename: AnsiString; PFilename: PAnsiChar; PWave: PSherpaOnnxWave; I: Integer; begin - AnsiFilename := Filename; - PFilename := PAnsiChar(AnsiFilename); + PFilename := PAnsiChar(Filename); PWave := SherpaOnnxReadWaveWrapper(PFilename); + Result.Samples := nil; SetLength(Result.Samples, PWave^.NumSamples); Result.SampleRate := PWave^.SampleRate; for I := Low(Result.Samples) to High(Result.Samples) do - Result.Samples[i] := PWave^.Samples[i]; + Result.Samples[I] := PWave^.Samples[I]; SherpaOnnxFreeWaveWrapper(PWave); end; +function TSherpaOnnxOnlineTransducerModelConfig.ToString: AnsiString; +begin + Result := Format('TSherpaOnnxOnlineTransducerModelConfig(Encoder := %s, Decoder := %s, Joiner := %s)', + [Self.Encoder, Self.Decoder, Self.Joiner]); +end; + +function TSherpaOnnxOnlineParaformerModelConfig.ToString: AnsiString; +begin + Result := Format('TSherpaOnnxOnlineParaformerModelConfig(Encoder := %s, Decoder := %s)', + [Self.Encoder, Self.Decoder]); +end; + +function TSherpaOnnxOnlineZipformer2CtcModelConfig.ToString: AnsiString; +begin + Result := Format('TSherpaOnnxOnlineZipformer2CtcModelConfig(Model := %s)', + [Self.Model]); +end; + +function TSherpaOnnxOnlineModelConfig.ToString: AnsiString; +begin + Result := Format('TSherpaOnnxOnlineModelConfig(Transducer := %s, ' + + 'Paraformer := %s,' + + 'Zipformer2Ctc := %s, ' + + 'Tokens := %s, ' + + 'NumThreads := %d, ' + + 'Provider := %s, ' + + 'Debug := %s, ' + + 'ModelType := %s, ' + + 'ModelingUnit := %s, ' + + 'BpeVocab := %s)' + , + [Self.Transducer.ToString, Self.Paraformer.ToString, + Self.Zipformer2Ctc.ToString, Self.Tokens, + Self.NumThreads, Self.Provider, Self.Debug.ToString, + Self.ModelType, Self.ModelingUnit, Self.BpeVocab + ]); +end; + +function TSherpaOnnxFeatureConfig.ToString: AnsiString; +begin + Result := Format('TSherpaOnnxFeatureConfig(SampleRate := %d, FeatureDim := %d)', + [Self.SampleRate, Self.FeatureDim]); +end; + +function TSherpaOnnxOnlineCtcFstDecoderConfig.ToString: AnsiString; +begin + Result := Format('TSherpaOnnxOnlineCtcFstDecoderConfig(Graph := %s, MaxActive := %d)', + [Self.Graph, Self.MaxActive]); +end; + +function TSherpaOnnxOnlineRecognizerConfig.ToString: AnsiString; +begin + Result := Format('TSherpaOnnxOnlineRecognizerConfig(FeatConfg := %s, ' + + 'ModelConfig := %s, ' + + 'DecodingMethod := %s, ' + + 'MaxActivePaths := %d, ' + + 'EnableEndpoint := %s, ' + + 'Rule1MinTrailingSilence := %.1f, ' + + 'Rule2MinTrailingSilence := %.1f, ' + + 'Rule3MinUtteranceLength := %.1f, ' + + 'HotwordsFile := %s, ' + + 'HotwordsScore := %.1f, ' + + 'CtcFstDecoderConfig := %s, ' + + 'RuleFsts := %s, ' + + 'RuleFars := %s, ' + + 'BlankPenalty := %.1f' + + ')' + , + [Self.FeatConfig.ToString, Self.ModelConfig.ToString, + Self.DecodingMethod, Self.MaxActivePaths, Self.EnableEndpoint.ToString, + Self.Rule1MinTrailingSilence, Self.Rule2MinTrailingSilence, + Self.Rule3MinUtteranceLength, Self.HotwordsFile, Self.HotwordsScore, + Self.CtcFstDecoderConfig.ToString, Self.RuleFsts, Self.RuleFars, + Self.BlankPenalty + ]); +end; + +function TSherpaOnnxOnlineRecognizerResult.ToString: AnsiString; +var + TokensStr: AnsiString; + S: AnsiString; + TimestampStr: AnsiString; + T: Single; + Sep: AnsiString; +begin + TokensStr := '['; + Sep := ''; + for S in Self.Tokens do + begin + TokensStr := TokensStr + Sep + S; + Sep := ', '; + end; + TokensStr := TokensStr + ']'; + + TimestampStr := '['; + Sep := ''; + for T in Self.Timestamps do + begin + TimestampStr := TimestampStr + Sep + Format('%.2f', [T]); + Sep := ', '; + end; + TimestampStr := TimestampStr + ']'; + + Result := Format('TSherpaOnnxOnlineRecognizerResult(Text := %s, ' + + 'Tokens := %s, ' + + 'Timestamps := %s, ' + + ')', + [Self.Text, TokensStr, TimestampStr]); +end; + +constructor TSherpaOnnxOnlineRecognizer.Create(Config: TSherpaOnnxOnlineRecognizerConfig); +var + C: SherpaOnnxOnlineRecognizerConfig; +begin + Initialize(C); + + C.FeatConfig.SampleRate := Config.FeatConfig.SampleRate; + C.FeatConfig.FeatureDim := Config.FeatConfig.FeatureDim; + + C.ModelConfig.Transducer.Encoder := PAnsiChar(Config.ModelConfig.Transducer.Encoder); + C.ModelConfig.Transducer.Decoder := PAnsiChar(Config.ModelConfig.Transducer.Decoder); + C.ModelConfig.Transducer.Joiner := PAnsiChar(Config.ModelConfig.Transducer.Joiner); + + C.ModelConfig.Paraformer.Encoder := PAnsiChar(Config.ModelConfig.Paraformer.Encoder); + C.ModelConfig.Paraformer.Decoder := PAnsiChar(Config.ModelConfig.Paraformer.Decoder); + + C.ModelConfig.Zipformer2Ctc.Model := PAnsiChar(Config.ModelConfig.Zipformer2Ctc.Model); + + C.ModelConfig.Tokens := PAnsiChar(Config.ModelConfig.Tokens); + C.ModelConfig.NumThreads := Config.ModelConfig.NumThreads; + C.ModelConfig.Provider := PAnsiChar(Config.ModelConfig.Provider); + C.ModelConfig.Debug := Ord(Config.ModelConfig.Debug); + C.ModelConfig.ModelType := PAnsiChar(Config.ModelConfig.ModelType); + C.ModelConfig.ModelingUnit := PAnsiChar(Config.ModelConfig.ModelingUnit); + C.ModelConfig.BpeVocab := PAnsiChar(Config.ModelConfig.BpeVocab); + + C.DecodingMethod := PAnsiChar(Config.DecodingMethod); + C.MaxActivePaths := Config.MaxActivePaths; + C.EnableEndpoint := Ord(Config.EnableEndpoint); + C.Rule1MinTrailingSilence := Config.Rule1MinTrailingSilence; + C.Rule2MinTrailingSilence := Config.Rule2MinTrailingSilence; + C.Rule3MinUtteranceLength := Config.Rule3MinUtteranceLength; + C.HotwordsFile := PAnsiChar(Config.HotwordsFile); + C.HotwordsScore := Config.HotwordsScore; + C.CtcFstDecoderConfig.Graph := PAnsiChar(Config.CtcFstDecoderConfig.Graph); + C.CtcFstDecoderConfig.MaxActive := Config.CtcFstDecoderConfig.MaxActive; + C.RuleFsts := PAnsiChar(Config.RuleFsts); + C.RuleFars := PAnsiChar(Config.RuleFars); + C.BlankPenalty := Config.BlankPenalty; + + Self.Handle := SherpaOnnxCreateOnlineRecognizer(@C); +end; + +destructor TSherpaOnnxOnlineRecognizer.Destroy; +begin + SherpaOnnxDestroyOnlineRecognizer(Self.Handle); + Self.Handle := nil; +end; + +function TSherpaOnnxOnlineRecognizer.CreateStream: TSherpaOnnxOnlineStream; +var + Stream: Pointer; +begin + Stream := SherpaOnnxCreateOnlineStream(Self.Handle); + Result := TSherpaOnnxOnlineStream.Create(Stream); +end; + +function TSherpaOnnxOnlineRecognizer.CreateStream(Hotwords: AnsiString): TSherpaOnnxOnlineStream; +var + Stream: Pointer; +begin + Stream := SherpaOnnxCreateOnlineStreamWithHotwords(Self.Handle, PAnsiChar(Hotwords)); + Result := TSherpaOnnxOnlineStream.Create(Stream); +end; + +function TSherpaOnnxOnlineRecognizer.IsReady(Stream: TSherpaOnnxOnlineStream): Boolean; +begin + Result := SherpaOnnxIsOnlineStreamReady(Self.Handle, Stream.Handle) = 1; +end; + +procedure TSherpaOnnxOnlineRecognizer.Decode(Stream: TSherpaOnnxOnlineStream); +begin + SherpaOnnxDecodeOnlineStream(Self.Handle, Stream.Handle); +end; + +procedure TSherpaOnnxOnlineRecognizer.Reset(Stream: TSherpaOnnxOnlineStream); +begin + SherpaOnnxOnlineStreamReset(Self.Handle, Stream.Handle); +end; + +function TSherpaOnnxOnlineRecognizer.IsEndpoint(Stream: TSherpaOnnxOnlineStream): Boolean; +begin + Result := SherpaOnnxOnlineStreamIsEndpoint(Self.Handle, Stream.Handle) = 1; +end; + +function TSherpaOnnxOnlineRecognizer.GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult; +var + pJson: PAnsiChar; + JsonData: TJSONData; + JsonObject : TJSONObject; + JsonEnum: TJSONEnum; + I: Integer; +begin + pJson := SherpaOnnxGetOnlineStreamResultAsJson(Self.Handle, Stream.Handle); + + { + - https://www.freepascal.org/daily/doc/fcl/fpjson/getjson.html + - https://www.freepascal.org/daily/doc/fcl/fpjson/tjsondata.html + - https://www.freepascal.org/daily/doc/fcl/fpjson/tjsonobject.html + - https://www.freepascal.org/daily/doc/fcl/fpjson/tjsonenum.html + } + + JsonData := GetJSON(AnsiString(pJson), False); + + JsonObject := JsonData as TJSONObject; + + Result.Text := JsonObject.Strings['text']; + + SetLength(Result.Tokens, JsonObject.Arrays['tokens'].Count); + + I := 0; + for JsonEnum in JsonObject.Arrays['tokens'] do + begin + Result.Tokens[I] := JsonEnum.Value.AsString; + Inc(I); + end; + + SetLength(Result.Timestamps, JsonObject.Arrays['timestamps'].Count); + I := 0; + for JsonEnum in JsonObject.Arrays['timestamps'] do + begin + Result.Timestamps[I] := JsonEnum.Value.AsFloat; + Inc(I); + end; + + SherpaOnnxDestroyOnlineStreamResultJson(pJson); +end; + + +constructor TSherpaOnnxOnlineStream.Create(P: Pointer); +begin + Self.Handle := P; +end; + +destructor TSherpaOnnxOnlineStream.Destroy; +begin + SherpaOnnxDestroyOnlineStream(Self.Handle); + Self.Handle := nil; +end; + +procedure TSherpaOnnxOnlineStream.AcceptWaveform(Samples: array of Single; SampleRate: Integer); +begin + SherpaOnnxOnlineStreamAcceptWaveform(Self.Handle, SampleRate, + pcfloat(Samples), Length(Samples)); +end; + +procedure TSherpaOnnxOnlineStream.InputFinished; +begin + SherpaOnnxOnlineStreamInputFinished(Self.Handle); +end; + end.