Pascal API for streaming ASR (#1246)
This commit is contained in:
58
.github/workflows/pascal.yaml
vendored
58
.github/workflows/pascal.yaml
vendored
@@ -39,7 +39,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
os: [ubuntu-latest, macos-latest, macos-13]
|
os: [ubuntu-latest, macos-latest, macos-13, windows-latest]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
@@ -64,10 +64,19 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
brew install fpc
|
brew install fpc
|
||||||
# brew install --cask lazarus
|
# brew install --cask lazarus
|
||||||
|
#
|
||||||
|
- name: Install Free pascal compiler (windows)
|
||||||
|
if: matrix.os == 'windows-latest'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
choco install lazarus
|
||||||
|
|
||||||
|
ls -lh /c/lazarus/fpc/3.2.2/bin/x86_64-win64/
|
||||||
|
|
||||||
- name: FPC info
|
- name: FPC info
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
|
export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH
|
||||||
which fpc
|
which fpc
|
||||||
fpc -i
|
fpc -i
|
||||||
|
|
||||||
@@ -87,6 +96,7 @@ jobs:
|
|||||||
cd build
|
cd build
|
||||||
|
|
||||||
cmake \
|
cmake \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=./install \
|
||||||
-D BUILD_SHARED_LIBS=ON \
|
-D BUILD_SHARED_LIBS=ON \
|
||||||
-D SHERPA_ONNX_ENABLE_BINARY=OFF \
|
-D SHERPA_ONNX_ENABLE_BINARY=OFF \
|
||||||
-D CMAKE_BUILD_TYPE=Release \
|
-D CMAKE_BUILD_TYPE=Release \
|
||||||
@@ -98,15 +108,55 @@ jobs:
|
|||||||
export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
|
export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
|
||||||
|
|
||||||
cd build
|
cd build
|
||||||
make -j2 sherpa-onnx-c-api
|
cmake --build . --target install --config Release
|
||||||
|
|
||||||
- name: Run Pascal test
|
ls -lh install/lib/
|
||||||
|
|
||||||
|
if [[ ${{ matrix.os }} == 'windows-latest' ]]; then
|
||||||
|
cp -v install/lib/*.dll ../pascal-api-examples/read-wav
|
||||||
|
cp -v install/lib/*.dll ../pascal-api-examples/streaming-asr
|
||||||
|
|
||||||
|
cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/read-wav
|
||||||
|
cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/streaming-asr
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Run Pascal test (Read wav test)
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
|
export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH
|
||||||
|
|
||||||
cd ./pascal-api-examples
|
cd ./pascal-api-examples
|
||||||
|
|
||||||
echo "----read-wav test-----"
|
|
||||||
pushd read-wav
|
pushd read-wav
|
||||||
./run.sh
|
./run.sh
|
||||||
|
echo "---"
|
||||||
|
ls -lh
|
||||||
|
popd
|
||||||
|
|
||||||
|
- name: Run Pascal test (Streaming ASR)
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH
|
||||||
|
|
||||||
|
cd ./pascal-api-examples
|
||||||
|
|
||||||
|
pushd streaming-asr
|
||||||
|
./run-zipformer-transducer.sh
|
||||||
|
rm -rf sherpa-onnx-*
|
||||||
|
echo "---"
|
||||||
|
|
||||||
|
if [[ ${{ matrix.os }} != 'windows-latest' ]]; then
|
||||||
|
./run-paraformer.sh
|
||||||
|
rm -rf sherpa-onnx-*
|
||||||
|
echo "---"
|
||||||
|
|
||||||
|
./run-zipformer-ctc.sh
|
||||||
|
echo "---"
|
||||||
|
|
||||||
|
./run-zipformer-ctc-hlg.sh
|
||||||
|
rm -rf sherpa-onnx-*
|
||||||
|
echo "---"
|
||||||
|
fi
|
||||||
|
|
||||||
ls -lh
|
ls -lh
|
||||||
popd
|
popd
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ public class StreamingDecodeFileCtcHLG {
|
|||||||
.build();
|
.build();
|
||||||
|
|
||||||
OnlineCtcFstDecoderConfig ctcFstDecoderConfig =
|
OnlineCtcFstDecoderConfig ctcFstDecoderConfig =
|
||||||
OnlineCtcFstDecoderConfig.builder().setGraph("hlg").build();
|
OnlineCtcFstDecoderConfig.builder().setGraph(hlg).build();
|
||||||
|
|
||||||
OnlineRecognizerConfig config =
|
OnlineRecognizerConfig config =
|
||||||
OnlineRecognizerConfig.builder()
|
OnlineRecognizerConfig.builder()
|
||||||
|
|||||||
9
pascal-api-examples/README.md
Normal file
9
pascal-api-examples/README.md
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
# Introduction
|
||||||
|
|
||||||
|
This directory contains examples for how to use the [Object Pascal](https://en.wikipedia.org/wiki/Object_Pascal)
|
||||||
|
APIs of [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).
|
||||||
|
|
||||||
|
|Directory| Description|
|
||||||
|
|---------|------------|
|
||||||
|
|[read-wav](./read-wav)|It shows how to read a wave file.|
|
||||||
|
|[streaming-asr](./streaming-asr)| It shows how to use streaming models for speech recognition.|
|
||||||
@@ -7,10 +7,11 @@ SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
|
|||||||
|
|
||||||
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
|
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
|
||||||
|
|
||||||
if [[ ! -f ../../build/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/lib/libsherpa-onnx-c-api.so ]]; then
|
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
|
||||||
mkdir -p ../../build
|
mkdir -p ../../build
|
||||||
pushd ../../build
|
pushd ../../build
|
||||||
cmake \
|
cmake \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=./install \
|
||||||
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||||
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||||
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||||
@@ -18,8 +19,7 @@ if [[ ! -f ../../build/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/lib/l
|
|||||||
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||||
..
|
..
|
||||||
|
|
||||||
make -j4 sherpa-onnx-c-api
|
cmake --build . --target install --config Release
|
||||||
ls -lh lib
|
|
||||||
popd
|
popd
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@@ -29,10 +29,10 @@ fi
|
|||||||
|
|
||||||
fpc \
|
fpc \
|
||||||
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
|
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
|
||||||
-Fl$SHERPA_ONNX_DIR/build/lib \
|
-Fl$SHERPA_ONNX_DIR/build/install/lib \
|
||||||
./main.pas
|
./main.pas
|
||||||
|
|
||||||
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/lib:$LD_LIBRARY_PATH
|
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
|
||||||
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/lib:$DYLD_LIBRARY_PATH
|
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
|
||||||
./main
|
./main
|
||||||
|
|||||||
4
pascal-api-examples/streaming-asr/.gitignore
vendored
Normal file
4
pascal-api-examples/streaming-asr/.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
zipformer_transducer
|
||||||
|
paraformer
|
||||||
|
zipformer_ctc
|
||||||
|
zipformer_ctc_hlg
|
||||||
11
pascal-api-examples/streaming-asr/README.md
Normal file
11
pascal-api-examples/streaming-asr/README.md
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
# Introduction
|
||||||
|
|
||||||
|
This folder contains examples about using sherpa-onnx's object pascal
|
||||||
|
APIs with streaming models for speech recognition.
|
||||||
|
|
||||||
|
|File|Description|
|
||||||
|
|----|-----------|
|
||||||
|
|[run-paraformer.sh](./run-paraformer.sh)|Use a streaming Paraformer model for speech recognition|
|
||||||
|
|[run-zipformer-ctc-hlg.sh](./run-zipformer-ctc-hlg.sh)|Use a streaming Zipformer CTC model for speech recognition|
|
||||||
|
|[run-zipformer-ctc.sh](./run-zipformer-ctc.sh)|Use a streaming Zipformer CTC model with HLG for speech recognition|
|
||||||
|
|[run-zipformer-transducer.sh](./run-zipformer-transducer.sh)|Use a Zipformer transducer model for speech recognition|
|
||||||
88
pascal-api-examples/streaming-asr/paraformer.pas
Normal file
88
pascal-api-examples/streaming-asr/paraformer.pas
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
{ Copyright (c) 2024 Xiaomi Corporation }
|
||||||
|
|
||||||
|
{
|
||||||
|
This file shows how to use a streaming Paraformer model to decode files.
|
||||||
|
|
||||||
|
You can download the model files from
|
||||||
|
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
}
|
||||||
|
|
||||||
|
program paraformer;
|
||||||
|
|
||||||
|
{$mode objfpc}
|
||||||
|
|
||||||
|
uses
|
||||||
|
sherpa_onnx,
|
||||||
|
DateUtils,
|
||||||
|
SysUtils;
|
||||||
|
|
||||||
|
var
|
||||||
|
Config: TSherpaOnnxOnlineRecognizerConfig;
|
||||||
|
Recognizer: TSherpaOnnxOnlineRecognizer;
|
||||||
|
Stream: TSherpaOnnxOnlineStream;
|
||||||
|
RecognitionResult: TSherpaOnnxOnlineRecognizerResult;
|
||||||
|
Wave: TSherpaOnnxWave;
|
||||||
|
WaveFilename: AnsiString;
|
||||||
|
TailPaddings: array of Single;
|
||||||
|
|
||||||
|
Start: TDateTime;
|
||||||
|
Stop: TDateTime;
|
||||||
|
|
||||||
|
Elapsed: Single;
|
||||||
|
Duration: Single;
|
||||||
|
RealTimeFactor: Single;
|
||||||
|
begin
|
||||||
|
Initialize(Config);
|
||||||
|
|
||||||
|
{Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
to download model files used in this file.}
|
||||||
|
Config.ModelConfig.Paraformer.Encoder := './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx';
|
||||||
|
Config.ModelConfig.Paraformer.Decoder := './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx';
|
||||||
|
Config.ModelConfig.Tokens := './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt';
|
||||||
|
|
||||||
|
Config.ModelConfig.Provider := 'cpu';
|
||||||
|
Config.ModelConfig.NumThreads := 1;
|
||||||
|
Config.ModelConfig.Debug := False;
|
||||||
|
|
||||||
|
WaveFilename := './sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/2.wav';
|
||||||
|
|
||||||
|
Wave := SherpaOnnxReadWave(WaveFilename);
|
||||||
|
|
||||||
|
Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config);
|
||||||
|
|
||||||
|
Start := Now;
|
||||||
|
|
||||||
|
Stream := Recognizer.CreateStream();
|
||||||
|
|
||||||
|
Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
|
||||||
|
|
||||||
|
SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding}
|
||||||
|
Stream.AcceptWaveform(TailPaddings, Wave.SampleRate);
|
||||||
|
|
||||||
|
Stream.InputFinished();
|
||||||
|
|
||||||
|
while Recognizer.IsReady(Stream) do
|
||||||
|
Recognizer.Decode(Stream);
|
||||||
|
|
||||||
|
RecognitionResult := Recognizer.GetResult(Stream);
|
||||||
|
|
||||||
|
Stop := Now;
|
||||||
|
|
||||||
|
Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
|
||||||
|
Duration := Length(Wave.Samples) / Wave.SampleRate;
|
||||||
|
RealTimeFactor := Elapsed / Duration;
|
||||||
|
|
||||||
|
WriteLn(RecognitionResult.ToString);
|
||||||
|
WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
|
||||||
|
WriteLn(Format('Elapsed %.3f s', [Elapsed]));
|
||||||
|
WriteLn(Format('Wave duration %.3f s', [Duration]));
|
||||||
|
WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
|
||||||
|
|
||||||
|
{Free resources to avoid memory leak.
|
||||||
|
|
||||||
|
Note: You don't need to invoke them for this simple script.
|
||||||
|
However, you have to invoke them in your own large/complex project.
|
||||||
|
}
|
||||||
|
FreeAndNil(Stream);
|
||||||
|
FreeAndNil(Recognizer);
|
||||||
|
end.
|
||||||
42
pascal-api-examples/streaming-asr/run-paraformer.sh
Executable file
42
pascal-api-examples/streaming-asr/run-paraformer.sh
Executable file
@@ -0,0 +1,42 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
|
||||||
|
|
||||||
|
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
|
||||||
|
|
||||||
|
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
|
||||||
|
mkdir -p ../../build
|
||||||
|
pushd ../../build
|
||||||
|
cmake \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=./install \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||||
|
-DBUILD_SHARED_LIBS=ON \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||||
|
..
|
||||||
|
|
||||||
|
cmake --build . --target install --config Release
|
||||||
|
ls -lh lib
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
if [ ! -f ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
|
||||||
|
rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
fpc \
|
||||||
|
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
|
||||||
|
-Fl$SHERPA_ONNX_DIR/build/install/lib \
|
||||||
|
./paraformer.pas
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
|
||||||
|
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
|
||||||
|
./paraformer
|
||||||
41
pascal-api-examples/streaming-asr/run-zipformer-ctc-hlg.sh
Executable file
41
pascal-api-examples/streaming-asr/run-zipformer-ctc-hlg.sh
Executable file
@@ -0,0 +1,41 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
|
||||||
|
|
||||||
|
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
|
||||||
|
|
||||||
|
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
|
||||||
|
mkdir -p ../../build
|
||||||
|
pushd ../../build
|
||||||
|
cmake \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=./install \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||||
|
-DBUILD_SHARED_LIBS=ON \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||||
|
..
|
||||||
|
|
||||||
|
cmake --build . --target install --config Release
|
||||||
|
ls -lh lib
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
|
||||||
|
rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
fpc \
|
||||||
|
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
|
||||||
|
-Fl$SHERPA_ONNX_DIR/build/install/lib \
|
||||||
|
./zipformer_ctc_hlg.pas
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
|
||||||
|
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
|
||||||
|
./zipformer_ctc_hlg
|
||||||
41
pascal-api-examples/streaming-asr/run-zipformer-ctc.sh
Executable file
41
pascal-api-examples/streaming-asr/run-zipformer-ctc.sh
Executable file
@@ -0,0 +1,41 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
|
||||||
|
|
||||||
|
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
|
||||||
|
|
||||||
|
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
|
||||||
|
mkdir -p ../../build
|
||||||
|
pushd ../../build
|
||||||
|
cmake \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=./install \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||||
|
-DBUILD_SHARED_LIBS=ON \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||||
|
..
|
||||||
|
|
||||||
|
cmake --build . --target install --config Release
|
||||||
|
ls -lh lib
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
|
||||||
|
rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
fpc \
|
||||||
|
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
|
||||||
|
-Fl$SHERPA_ONNX_DIR/build/install/lib \
|
||||||
|
./zipformer_ctc.pas
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
|
||||||
|
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
|
||||||
|
./zipformer_ctc
|
||||||
42
pascal-api-examples/streaming-asr/run-zipformer-transducer.sh
Executable file
42
pascal-api-examples/streaming-asr/run-zipformer-transducer.sh
Executable file
@@ -0,0 +1,42 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
|
||||||
|
|
||||||
|
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
|
||||||
|
|
||||||
|
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
|
||||||
|
mkdir -p ../../build
|
||||||
|
pushd ../../build
|
||||||
|
cmake \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=./install \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||||
|
-DBUILD_SHARED_LIBS=ON \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||||
|
..
|
||||||
|
|
||||||
|
cmake --build . --target install --config Release
|
||||||
|
ls -lh lib
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
fpc \
|
||||||
|
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
|
||||||
|
-Fl$SHERPA_ONNX_DIR/build/install/lib \
|
||||||
|
./zipformer_transducer.pas
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
|
||||||
|
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
|
||||||
|
./zipformer_transducer
|
||||||
87
pascal-api-examples/streaming-asr/zipformer_ctc.pas
Normal file
87
pascal-api-examples/streaming-asr/zipformer_ctc.pas
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
{ Copyright (c) 2024 Xiaomi Corporation }
|
||||||
|
|
||||||
|
{
|
||||||
|
This file shows how to use a streaming Zipformer CTC model
|
||||||
|
to decode files.
|
||||||
|
|
||||||
|
You can download the model files from
|
||||||
|
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
}
|
||||||
|
|
||||||
|
program zipformer_ctc;
|
||||||
|
|
||||||
|
{$mode objfpc}
|
||||||
|
|
||||||
|
uses
|
||||||
|
sherpa_onnx,
|
||||||
|
DateUtils,
|
||||||
|
SysUtils;
|
||||||
|
|
||||||
|
var
|
||||||
|
Config: TSherpaOnnxOnlineRecognizerConfig;
|
||||||
|
Recognizer: TSherpaOnnxOnlineRecognizer;
|
||||||
|
Stream: TSherpaOnnxOnlineStream;
|
||||||
|
RecognitionResult: TSherpaOnnxOnlineRecognizerResult;
|
||||||
|
Wave: TSherpaOnnxWave;
|
||||||
|
WaveFilename: AnsiString;
|
||||||
|
TailPaddings: array of Single;
|
||||||
|
|
||||||
|
Start: TDateTime;
|
||||||
|
Stop: TDateTime;
|
||||||
|
|
||||||
|
Elapsed: Single;
|
||||||
|
Duration: Single;
|
||||||
|
RealTimeFactor: Single;
|
||||||
|
begin
|
||||||
|
Initialize(Config);
|
||||||
|
|
||||||
|
{Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
to download model files used in this file.}
|
||||||
|
Config.ModelConfig.Zipformer2Ctc.Model := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx';
|
||||||
|
Config.ModelConfig.Tokens := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt';
|
||||||
|
Config.ModelConfig.Provider := 'cpu';
|
||||||
|
Config.ModelConfig.NumThreads := 1;
|
||||||
|
Config.ModelConfig.Debug := False;
|
||||||
|
|
||||||
|
WaveFilename := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav';
|
||||||
|
|
||||||
|
Wave := SherpaOnnxReadWave(WaveFilename);
|
||||||
|
|
||||||
|
Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config);
|
||||||
|
|
||||||
|
Start := Now;
|
||||||
|
|
||||||
|
Stream := Recognizer.CreateStream();
|
||||||
|
|
||||||
|
Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
|
||||||
|
|
||||||
|
SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding}
|
||||||
|
Stream.AcceptWaveform(TailPaddings, Wave.SampleRate);
|
||||||
|
|
||||||
|
Stream.InputFinished();
|
||||||
|
|
||||||
|
while Recognizer.IsReady(Stream) do
|
||||||
|
Recognizer.Decode(Stream);
|
||||||
|
|
||||||
|
RecognitionResult := Recognizer.GetResult(Stream);
|
||||||
|
|
||||||
|
Stop := Now;
|
||||||
|
|
||||||
|
Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
|
||||||
|
Duration := Length(Wave.Samples) / Wave.SampleRate;
|
||||||
|
RealTimeFactor := Elapsed / Duration;
|
||||||
|
|
||||||
|
WriteLn(RecognitionResult.ToString);
|
||||||
|
WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
|
||||||
|
WriteLn(Format('Elapsed %.3f s', [Elapsed]));
|
||||||
|
WriteLn(Format('Wave duration %.3f s', [Duration]));
|
||||||
|
WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
|
||||||
|
|
||||||
|
{Free resources to avoid memory leak.
|
||||||
|
|
||||||
|
Note: You don't need to invoke them for this simple script.
|
||||||
|
However, you have to invoke them in your own large/complex project.
|
||||||
|
}
|
||||||
|
FreeAndNil(Stream);
|
||||||
|
FreeAndNil(Recognizer);
|
||||||
|
end.
|
||||||
88
pascal-api-examples/streaming-asr/zipformer_ctc_hlg.pas
Normal file
88
pascal-api-examples/streaming-asr/zipformer_ctc_hlg.pas
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
{ Copyright (c) 2024 Xiaomi Corporation }
|
||||||
|
|
||||||
|
{
|
||||||
|
This file shows how to use a streaming Zipformer CTC model
|
||||||
|
with HLG to decode files.
|
||||||
|
|
||||||
|
You can download the model files from
|
||||||
|
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
}
|
||||||
|
|
||||||
|
program zipformer_ctc_hlg;
|
||||||
|
|
||||||
|
{$mode objfpc}
|
||||||
|
|
||||||
|
uses
|
||||||
|
sherpa_onnx,
|
||||||
|
DateUtils,
|
||||||
|
SysUtils;
|
||||||
|
|
||||||
|
var
|
||||||
|
Config: TSherpaOnnxOnlineRecognizerConfig;
|
||||||
|
Recognizer: TSherpaOnnxOnlineRecognizer;
|
||||||
|
Stream: TSherpaOnnxOnlineStream;
|
||||||
|
RecognitionResult: TSherpaOnnxOnlineRecognizerResult;
|
||||||
|
Wave: TSherpaOnnxWave;
|
||||||
|
WaveFilename: AnsiString;
|
||||||
|
TailPaddings: array of Single;
|
||||||
|
|
||||||
|
Start: TDateTime;
|
||||||
|
Stop: TDateTime;
|
||||||
|
|
||||||
|
Elapsed: Single;
|
||||||
|
Duration: Single;
|
||||||
|
RealTimeFactor: Single;
|
||||||
|
begin
|
||||||
|
Initialize(Config);
|
||||||
|
|
||||||
|
{Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
to download model files used in this file.}
|
||||||
|
Config.ModelConfig.Zipformer2Ctc.Model := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx';
|
||||||
|
Config.ModelConfig.Tokens := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt';
|
||||||
|
Config.ModelConfig.Provider := 'cpu';
|
||||||
|
Config.ModelConfig.NumThreads := 1;
|
||||||
|
Config.ModelConfig.Debug := True;
|
||||||
|
Config.CtcFstDecoderConfig.Graph := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst';
|
||||||
|
|
||||||
|
WaveFilename := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav';
|
||||||
|
|
||||||
|
Wave := SherpaOnnxReadWave(WaveFilename);
|
||||||
|
|
||||||
|
Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config);
|
||||||
|
|
||||||
|
Start := Now;
|
||||||
|
|
||||||
|
Stream := Recognizer.CreateStream();
|
||||||
|
|
||||||
|
Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
|
||||||
|
|
||||||
|
SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding}
|
||||||
|
Stream.AcceptWaveform(TailPaddings, Wave.SampleRate);
|
||||||
|
|
||||||
|
Stream.InputFinished();
|
||||||
|
|
||||||
|
while Recognizer.IsReady(Stream) do
|
||||||
|
Recognizer.Decode(Stream);
|
||||||
|
|
||||||
|
RecognitionResult := Recognizer.GetResult(Stream);
|
||||||
|
|
||||||
|
Stop := Now;
|
||||||
|
|
||||||
|
Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
|
||||||
|
Duration := Length(Wave.Samples) / Wave.SampleRate;
|
||||||
|
RealTimeFactor := Elapsed / Duration;
|
||||||
|
|
||||||
|
WriteLn(RecognitionResult.ToString);
|
||||||
|
WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
|
||||||
|
WriteLn(Format('Elapsed %.3f s', [Elapsed]));
|
||||||
|
WriteLn(Format('Wave duration %.3f s', [Duration]));
|
||||||
|
WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
|
||||||
|
|
||||||
|
{Free resources to avoid memory leak.
|
||||||
|
|
||||||
|
Note: You don't need to invoke them for this simple script.
|
||||||
|
However, you have to invoke them in your own large/complex project.
|
||||||
|
}
|
||||||
|
FreeAndNil(Stream);
|
||||||
|
FreeAndNil(Recognizer);
|
||||||
|
end.
|
||||||
89
pascal-api-examples/streaming-asr/zipformer_transducer.pas
Normal file
89
pascal-api-examples/streaming-asr/zipformer_transducer.pas
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
{ Copyright (c) 2024 Xiaomi Corporation }
|
||||||
|
|
||||||
|
{
|
||||||
|
This file shows how to use a streaming Zipformer transducer
|
||||||
|
to decode files.
|
||||||
|
|
||||||
|
You can download the model files from
|
||||||
|
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
}
|
||||||
|
|
||||||
|
program zipformer_transducer;
|
||||||
|
|
||||||
|
{$mode objfpc}
|
||||||
|
|
||||||
|
uses
|
||||||
|
sherpa_onnx,
|
||||||
|
DateUtils,
|
||||||
|
SysUtils;
|
||||||
|
|
||||||
|
var
|
||||||
|
Config: TSherpaOnnxOnlineRecognizerConfig;
|
||||||
|
Recognizer: TSherpaOnnxOnlineRecognizer;
|
||||||
|
Stream: TSherpaOnnxOnlineStream;
|
||||||
|
RecognitionResult: TSherpaOnnxOnlineRecognizerResult;
|
||||||
|
Wave: TSherpaOnnxWave;
|
||||||
|
WaveFilename: AnsiString;
|
||||||
|
TailPaddings: array of Single;
|
||||||
|
|
||||||
|
Start: TDateTime;
|
||||||
|
Stop: TDateTime;
|
||||||
|
|
||||||
|
Elapsed: Single;
|
||||||
|
Duration: Single;
|
||||||
|
RealTimeFactor: Single;
|
||||||
|
begin
|
||||||
|
Initialize(Config);
|
||||||
|
|
||||||
|
{Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
to download model files used in this file.}
|
||||||
|
Config.ModelConfig.Transducer.Encoder := './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx';
|
||||||
|
Config.ModelConfig.Transducer.Decoder := './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx';
|
||||||
|
Config.ModelConfig.Transducer.Joiner := './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx';
|
||||||
|
Config.ModelConfig.Tokens := './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt';
|
||||||
|
Config.ModelConfig.Provider := 'cpu';
|
||||||
|
Config.ModelConfig.NumThreads := 1;
|
||||||
|
Config.ModelConfig.Debug := False;
|
||||||
|
|
||||||
|
WaveFilename := './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav';
|
||||||
|
|
||||||
|
Wave := SherpaOnnxReadWave(WaveFilename);
|
||||||
|
|
||||||
|
Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config);
|
||||||
|
|
||||||
|
Start := Now;
|
||||||
|
|
||||||
|
Stream := Recognizer.CreateStream();
|
||||||
|
|
||||||
|
Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
|
||||||
|
|
||||||
|
SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding}
|
||||||
|
Stream.AcceptWaveform(TailPaddings, Wave.SampleRate);
|
||||||
|
|
||||||
|
Stream.InputFinished();
|
||||||
|
|
||||||
|
while Recognizer.IsReady(Stream) do
|
||||||
|
Recognizer.Decode(Stream);
|
||||||
|
|
||||||
|
RecognitionResult := Recognizer.GetResult(Stream);
|
||||||
|
|
||||||
|
Stop := Now;
|
||||||
|
|
||||||
|
Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
|
||||||
|
Duration := Length(Wave.Samples) / Wave.SampleRate;
|
||||||
|
RealTimeFactor := Elapsed / Duration;
|
||||||
|
|
||||||
|
WriteLn(RecognitionResult.ToString);
|
||||||
|
WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
|
||||||
|
WriteLn(Format('Elapsed %.3f s', [Elapsed]));
|
||||||
|
WriteLn(Format('Wave duration %.3f s', [Duration]));
|
||||||
|
WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
|
||||||
|
|
||||||
|
{Free resources to avoid memory leak.
|
||||||
|
|
||||||
|
Note: You don't need to invoke them for this simple script.
|
||||||
|
However, you have to invoke them in your own large/complex project.
|
||||||
|
}
|
||||||
|
FreeAndNil(Stream);
|
||||||
|
FreeAndNil(Recognizer);
|
||||||
|
end.
|
||||||
@@ -75,17 +75,31 @@ SherpaOnnxOnlineRecognizer *SherpaOnnxCreateOnlineRecognizer(
|
|||||||
SHERPA_ONNX_OR(config->model_config.num_threads, 1);
|
SHERPA_ONNX_OR(config->model_config.num_threads, 1);
|
||||||
recognizer_config.model_config.provider_config.provider =
|
recognizer_config.model_config.provider_config.provider =
|
||||||
SHERPA_ONNX_OR(config->model_config.provider, "cpu");
|
SHERPA_ONNX_OR(config->model_config.provider, "cpu");
|
||||||
|
|
||||||
|
if (recognizer_config.model_config.provider_config.provider.empty()) {
|
||||||
|
recognizer_config.model_config.provider_config.provider = "cpu";
|
||||||
|
}
|
||||||
|
|
||||||
recognizer_config.model_config.model_type =
|
recognizer_config.model_config.model_type =
|
||||||
SHERPA_ONNX_OR(config->model_config.model_type, "");
|
SHERPA_ONNX_OR(config->model_config.model_type, "");
|
||||||
recognizer_config.model_config.debug =
|
recognizer_config.model_config.debug =
|
||||||
SHERPA_ONNX_OR(config->model_config.debug, 0);
|
SHERPA_ONNX_OR(config->model_config.debug, 0);
|
||||||
recognizer_config.model_config.modeling_unit =
|
recognizer_config.model_config.modeling_unit =
|
||||||
SHERPA_ONNX_OR(config->model_config.modeling_unit, "cjkchar");
|
SHERPA_ONNX_OR(config->model_config.modeling_unit, "cjkchar");
|
||||||
|
|
||||||
|
if (recognizer_config.model_config.modeling_unit.empty()) {
|
||||||
|
recognizer_config.model_config.modeling_unit = "cjkchar";
|
||||||
|
}
|
||||||
|
|
||||||
recognizer_config.model_config.bpe_vocab =
|
recognizer_config.model_config.bpe_vocab =
|
||||||
SHERPA_ONNX_OR(config->model_config.bpe_vocab, "");
|
SHERPA_ONNX_OR(config->model_config.bpe_vocab, "");
|
||||||
|
|
||||||
recognizer_config.decoding_method =
|
recognizer_config.decoding_method =
|
||||||
SHERPA_ONNX_OR(config->decoding_method, "greedy_search");
|
SHERPA_ONNX_OR(config->decoding_method, "greedy_search");
|
||||||
|
if (recognizer_config.decoding_method.empty()) {
|
||||||
|
recognizer_config.decoding_method = "greedy_search";
|
||||||
|
}
|
||||||
|
|
||||||
recognizer_config.max_active_paths =
|
recognizer_config.max_active_paths =
|
||||||
SHERPA_ONNX_OR(config->max_active_paths, 4);
|
SHERPA_ONNX_OR(config->max_active_paths, 4);
|
||||||
|
|
||||||
@@ -391,10 +405,19 @@ sherpa_onnx::OfflineRecognizerConfig convertConfig(
|
|||||||
SHERPA_ONNX_OR(config->model_config.debug, 0);
|
SHERPA_ONNX_OR(config->model_config.debug, 0);
|
||||||
recognizer_config.model_config.provider =
|
recognizer_config.model_config.provider =
|
||||||
SHERPA_ONNX_OR(config->model_config.provider, "cpu");
|
SHERPA_ONNX_OR(config->model_config.provider, "cpu");
|
||||||
|
if (recognizer_config.model_config.provider.empty()) {
|
||||||
|
recognizer_config.model_config.provider = "cpu";
|
||||||
|
}
|
||||||
|
|
||||||
recognizer_config.model_config.model_type =
|
recognizer_config.model_config.model_type =
|
||||||
SHERPA_ONNX_OR(config->model_config.model_type, "");
|
SHERPA_ONNX_OR(config->model_config.model_type, "");
|
||||||
recognizer_config.model_config.modeling_unit =
|
recognizer_config.model_config.modeling_unit =
|
||||||
SHERPA_ONNX_OR(config->model_config.modeling_unit, "cjkchar");
|
SHERPA_ONNX_OR(config->model_config.modeling_unit, "cjkchar");
|
||||||
|
|
||||||
|
if (recognizer_config.model_config.modeling_unit.empty()) {
|
||||||
|
recognizer_config.model_config.modeling_unit = "cjkchar";
|
||||||
|
}
|
||||||
|
|
||||||
recognizer_config.model_config.bpe_vocab =
|
recognizer_config.model_config.bpe_vocab =
|
||||||
SHERPA_ONNX_OR(config->model_config.bpe_vocab, "");
|
SHERPA_ONNX_OR(config->model_config.bpe_vocab, "");
|
||||||
|
|
||||||
@@ -620,6 +643,10 @@ SherpaOnnxKeywordSpotter *SherpaOnnxCreateKeywordSpotter(
|
|||||||
SHERPA_ONNX_OR(config->model_config.num_threads, 1);
|
SHERPA_ONNX_OR(config->model_config.num_threads, 1);
|
||||||
spotter_config.model_config.provider_config.provider =
|
spotter_config.model_config.provider_config.provider =
|
||||||
SHERPA_ONNX_OR(config->model_config.provider, "cpu");
|
SHERPA_ONNX_OR(config->model_config.provider, "cpu");
|
||||||
|
if (spotter_config.model_config.provider_config.provider.empty()) {
|
||||||
|
spotter_config.model_config.provider_config.provider = "cpu";
|
||||||
|
}
|
||||||
|
|
||||||
spotter_config.model_config.model_type =
|
spotter_config.model_config.model_type =
|
||||||
SHERPA_ONNX_OR(config->model_config.model_type, "");
|
SHERPA_ONNX_OR(config->model_config.model_type, "");
|
||||||
spotter_config.model_config.debug =
|
spotter_config.model_config.debug =
|
||||||
@@ -855,6 +882,10 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector(
|
|||||||
vad_config.sample_rate = SHERPA_ONNX_OR(config->sample_rate, 16000);
|
vad_config.sample_rate = SHERPA_ONNX_OR(config->sample_rate, 16000);
|
||||||
vad_config.num_threads = SHERPA_ONNX_OR(config->num_threads, 1);
|
vad_config.num_threads = SHERPA_ONNX_OR(config->num_threads, 1);
|
||||||
vad_config.provider = SHERPA_ONNX_OR(config->provider, "cpu");
|
vad_config.provider = SHERPA_ONNX_OR(config->provider, "cpu");
|
||||||
|
if (vad_config.provider.empty()) {
|
||||||
|
vad_config.provider = "cpu";
|
||||||
|
}
|
||||||
|
|
||||||
vad_config.debug = SHERPA_ONNX_OR(config->debug, false);
|
vad_config.debug = SHERPA_ONNX_OR(config->debug, false);
|
||||||
|
|
||||||
if (vad_config.debug) {
|
if (vad_config.debug) {
|
||||||
@@ -956,6 +987,10 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
|
|||||||
tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
|
tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
|
||||||
tts_config.model.debug = config->model.debug;
|
tts_config.model.debug = config->model.debug;
|
||||||
tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
|
tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
|
||||||
|
if (tts_config.model.provider.empty()) {
|
||||||
|
tts_config.model.provider = "cpu";
|
||||||
|
}
|
||||||
|
|
||||||
tts_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, "");
|
tts_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, "");
|
||||||
tts_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, "");
|
tts_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, "");
|
||||||
tts_config.max_num_sentences = SHERPA_ONNX_OR(config->max_num_sentences, 2);
|
tts_config.max_num_sentences = SHERPA_ONNX_OR(config->max_num_sentences, 2);
|
||||||
@@ -1101,6 +1136,9 @@ SherpaOnnxCreateSpokenLanguageIdentification(
|
|||||||
slid_config.num_threads = SHERPA_ONNX_OR(config->num_threads, 1);
|
slid_config.num_threads = SHERPA_ONNX_OR(config->num_threads, 1);
|
||||||
slid_config.debug = config->debug;
|
slid_config.debug = config->debug;
|
||||||
slid_config.provider = SHERPA_ONNX_OR(config->provider, "cpu");
|
slid_config.provider = SHERPA_ONNX_OR(config->provider, "cpu");
|
||||||
|
if (slid_config.provider.empty()) {
|
||||||
|
slid_config.provider = "cpu";
|
||||||
|
}
|
||||||
|
|
||||||
if (slid_config.debug) {
|
if (slid_config.debug) {
|
||||||
SHERPA_ONNX_LOGE("%s\n", slid_config.ToString().c_str());
|
SHERPA_ONNX_LOGE("%s\n", slid_config.ToString().c_str());
|
||||||
@@ -1167,6 +1205,9 @@ SherpaOnnxCreateSpeakerEmbeddingExtractor(
|
|||||||
c.num_threads = SHERPA_ONNX_OR(config->num_threads, 1);
|
c.num_threads = SHERPA_ONNX_OR(config->num_threads, 1);
|
||||||
c.debug = SHERPA_ONNX_OR(config->debug, 0);
|
c.debug = SHERPA_ONNX_OR(config->debug, 0);
|
||||||
c.provider = SHERPA_ONNX_OR(config->provider, "cpu");
|
c.provider = SHERPA_ONNX_OR(config->provider, "cpu");
|
||||||
|
if (c.provider.empty()) {
|
||||||
|
c.provider = "cpu";
|
||||||
|
}
|
||||||
|
|
||||||
if (config->debug) {
|
if (config->debug) {
|
||||||
SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str());
|
SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str());
|
||||||
@@ -1401,6 +1442,10 @@ const SherpaOnnxAudioTagging *SherpaOnnxCreateAudioTagging(
|
|||||||
ac.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
|
ac.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
|
||||||
ac.model.debug = config->model.debug;
|
ac.model.debug = config->model.debug;
|
||||||
ac.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
|
ac.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
|
||||||
|
if (ac.model.provider.empty()) {
|
||||||
|
ac.model.provider = "cpu";
|
||||||
|
}
|
||||||
|
|
||||||
ac.labels = SHERPA_ONNX_OR(config->labels, "");
|
ac.labels = SHERPA_ONNX_OR(config->labels, "");
|
||||||
ac.top_k = SHERPA_ONNX_OR(config->top_k, 5);
|
ac.top_k = SHERPA_ONNX_OR(config->top_k, 5);
|
||||||
|
|
||||||
@@ -1487,6 +1532,9 @@ const SherpaOnnxOfflinePunctuation *SherpaOnnxCreateOfflinePunctuation(
|
|||||||
c.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
|
c.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
|
||||||
c.model.debug = config->model.debug;
|
c.model.debug = config->model.debug;
|
||||||
c.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
|
c.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
|
||||||
|
if (c.model.provider.empty()) {
|
||||||
|
c.model.provider = "cpu";
|
||||||
|
}
|
||||||
|
|
||||||
if (c.model.debug) {
|
if (c.model.debug) {
|
||||||
SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str());
|
SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str());
|
||||||
|
|||||||
@@ -4,6 +4,9 @@ unit sherpa_onnx;
|
|||||||
|
|
||||||
{$mode objfpc}
|
{$mode objfpc}
|
||||||
|
|
||||||
|
{$modeSwitch advancedRecords} { to support records with methods }
|
||||||
|
(* {$LongStrings ON} *)
|
||||||
|
|
||||||
interface
|
interface
|
||||||
|
|
||||||
type
|
type
|
||||||
@@ -12,15 +15,117 @@ type
|
|||||||
SampleRate: Integer;
|
SampleRate: Integer;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
TSherpaOnnxOnlineTransducerModelConfig = record
|
||||||
|
Encoder: AnsiString;
|
||||||
|
Decoder: AnsiString;
|
||||||
|
Joiner: AnsiString;
|
||||||
|
function ToString: AnsiString;
|
||||||
|
end;
|
||||||
|
|
||||||
|
TSherpaOnnxOnlineParaformerModelConfig = record
|
||||||
|
Encoder: AnsiString;
|
||||||
|
Decoder: AnsiString;
|
||||||
|
function ToString: AnsiString;
|
||||||
|
end;
|
||||||
|
|
||||||
|
TSherpaOnnxOnlineZipformer2CtcModelConfig = record
|
||||||
|
Model: AnsiString;
|
||||||
|
function ToString: AnsiString;
|
||||||
|
end;
|
||||||
|
|
||||||
|
TSherpaOnnxOnlineModelConfig = record
|
||||||
|
Transducer: TSherpaOnnxOnlineTransducerModelConfig;
|
||||||
|
Paraformer: TSherpaOnnxOnlineParaformerModelConfig;
|
||||||
|
Zipformer2Ctc: TSherpaOnnxOnlineZipformer2CtcModelConfig;
|
||||||
|
Tokens: AnsiString;
|
||||||
|
NumThreads: Integer;
|
||||||
|
Provider: AnsiString;
|
||||||
|
Debug: Boolean;
|
||||||
|
ModelType: AnsiString;
|
||||||
|
ModelingUnit: AnsiString;
|
||||||
|
BpeVocab: AnsiString;
|
||||||
|
function ToString: AnsiString;
|
||||||
|
end;
|
||||||
|
|
||||||
|
TSherpaOnnxFeatureConfig = record
|
||||||
|
SampleRate: Integer;
|
||||||
|
FeatureDim: Integer;
|
||||||
|
function ToString: AnsiString;
|
||||||
|
end;
|
||||||
|
|
||||||
|
TSherpaOnnxOnlineCtcFstDecoderConfig = record
|
||||||
|
Graph: AnsiString;
|
||||||
|
MaxActive: Integer;
|
||||||
|
function ToString: AnsiString;
|
||||||
|
end;
|
||||||
|
|
||||||
|
TSherpaOnnxOnlineRecognizerConfig = record
|
||||||
|
FeatConfig: TSherpaOnnxFeatureConfig;
|
||||||
|
ModelConfig: TSherpaOnnxOnlineModelConfig;
|
||||||
|
DecodingMethod: AnsiString;
|
||||||
|
MaxActivePaths: Integer;
|
||||||
|
EnableEndpoint: Boolean;
|
||||||
|
Rule1MinTrailingSilence: Single;
|
||||||
|
Rule2MinTrailingSilence: Single;
|
||||||
|
Rule3MinUtteranceLength: Single;
|
||||||
|
HotwordsFile: AnsiString;
|
||||||
|
HotwordsScore: Single;
|
||||||
|
CtcFstDecoderConfig: TSherpaOnnxOnlineCtcFstDecoderConfig;
|
||||||
|
RuleFsts: AnsiString;
|
||||||
|
RuleFars: AnsiString;
|
||||||
|
BlankPenalty: Single;
|
||||||
|
function ToString: AnsiString;
|
||||||
|
end;
|
||||||
|
|
||||||
|
TSherpaOnnxOnlineRecognizerResult = record
|
||||||
|
Text: AnsiString;
|
||||||
|
Tokens: array of AnsiString;
|
||||||
|
Timestamps: array of Single;
|
||||||
|
function ToString: AnsiString;
|
||||||
|
end;
|
||||||
|
|
||||||
|
TSherpaOnnxOnlineStream = class
|
||||||
|
private
|
||||||
|
Handle: Pointer;
|
||||||
|
public
|
||||||
|
constructor Create(P: Pointer);
|
||||||
|
destructor Destroy; override;
|
||||||
|
procedure AcceptWaveform(Samples: array of Single; SampleRate: Integer);
|
||||||
|
procedure InputFinished;
|
||||||
|
end;
|
||||||
|
|
||||||
|
TSherpaOnnxOnlineRecognizer = class
|
||||||
|
private
|
||||||
|
Handle: Pointer;
|
||||||
|
public
|
||||||
|
constructor Create(Config: TSherpaOnnxOnlineRecognizerConfig);
|
||||||
|
destructor Destroy; override;
|
||||||
|
|
||||||
|
function CreateStream: TSherpaOnnxOnlineStream; overload;
|
||||||
|
function CreateStream(Hotwords: AnsiString): TSherpaOnnxOnlineStream; overload;
|
||||||
|
function IsReady(Stream: TSherpaOnnxOnlineStream): Boolean;
|
||||||
|
procedure Decode(Stream: TSherpaOnnxOnlineStream);
|
||||||
|
procedure Reset(Stream: TSherpaOnnxOnlineStream);
|
||||||
|
function IsEndpoint(Stream: TSherpaOnnxOnlineStream): Boolean;
|
||||||
|
function GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult;
|
||||||
|
end;
|
||||||
|
|
||||||
{ It supports reading a single channel wave with 16-bit encoded samples.
|
{ It supports reading a single channel wave with 16-bit encoded samples.
|
||||||
Samples are normalized to the range [-1, 1].
|
Samples are normalized to the range [-1, 1].
|
||||||
}
|
}
|
||||||
function SherpaOnnxReadWave(Filename: string): TSherpaOnnxWave;
|
function SherpaOnnxReadWave(Filename: AnsiString): TSherpaOnnxWave;
|
||||||
|
|
||||||
implementation
|
implementation
|
||||||
|
|
||||||
uses
|
uses
|
||||||
ctypes;
|
ctypes,
|
||||||
|
fpjson,
|
||||||
|
{ See
|
||||||
|
- https://wiki.freepascal.org/fcl-json
|
||||||
|
- https://www.freepascal.org/daily/doc/fcl/fpjson/getjson.html
|
||||||
|
}
|
||||||
|
jsonparser,
|
||||||
|
SysUtils;
|
||||||
|
|
||||||
const
|
const
|
||||||
{See https://www.freepascal.org/docs-html/prog/progap7.html}
|
{See https://www.freepascal.org/docs-html/prog/progap7.html}
|
||||||
@@ -47,31 +152,383 @@ type
|
|||||||
|
|
||||||
PSherpaOnnxWave = ^SherpaOnnxWave;
|
PSherpaOnnxWave = ^SherpaOnnxWave;
|
||||||
|
|
||||||
|
SherpaOnnxOnlineTransducerModelConfig = record
|
||||||
|
Encoder: PAnsiChar;
|
||||||
|
Decoder: PAnsiChar;
|
||||||
|
Joiner: PAnsiChar;
|
||||||
|
end;
|
||||||
|
SherpaOnnxOnlineParaformerModelConfig = record
|
||||||
|
Encoder: PAnsiChar;
|
||||||
|
Decoder: PAnsiChar;
|
||||||
|
end;
|
||||||
|
SherpaOnnxOnlineZipformer2CtcModelConfig = record
|
||||||
|
Model: PAnsiChar;
|
||||||
|
end;
|
||||||
|
|
||||||
|
SherpaOnnxOnlineModelConfig= record
|
||||||
|
Transducer: SherpaOnnxOnlineTransducerModelConfig;
|
||||||
|
Paraformer: SherpaOnnxOnlineParaformerModelConfig;
|
||||||
|
Zipformer2Ctc: SherpaOnnxOnlineZipformer2CtcModelConfig;
|
||||||
|
Tokens: PAnsiChar;
|
||||||
|
NumThreads: cint32;
|
||||||
|
Provider: PAnsiChar;
|
||||||
|
Debug: cint32;
|
||||||
|
ModelType: PAnsiChar;
|
||||||
|
ModelingUnit: PAnsiChar;
|
||||||
|
BpeVocab: PAnsiChar;
|
||||||
|
end;
|
||||||
|
SherpaOnnxFeatureConfig = record
|
||||||
|
SampleRate: cint32;
|
||||||
|
FeatureDim: cint32;
|
||||||
|
end;
|
||||||
|
SherpaOnnxOnlineCtcFstDecoderConfig = record
|
||||||
|
Graph: PAnsiChar;
|
||||||
|
MaxActive: cint32;
|
||||||
|
end;
|
||||||
|
SherpaOnnxOnlineRecognizerConfig = record
|
||||||
|
FeatConfig: SherpaOnnxFeatureConfig;
|
||||||
|
ModelConfig: SherpaOnnxOnlineModelConfig;
|
||||||
|
DecodingMethod: PAnsiChar;
|
||||||
|
MaxActivePaths: cint32;
|
||||||
|
EnableEndpoint: cint32;
|
||||||
|
Rule1MinTrailingSilence: Single;
|
||||||
|
Rule2MinTrailingSilence: Single;
|
||||||
|
Rule3MinUtteranceLength: Single;
|
||||||
|
HotwordsFile: PAnsiChar;
|
||||||
|
HotwordsScore: Single;
|
||||||
|
CtcFstDecoderConfig: SherpaOnnxOnlineCtcFstDecoderConfig;
|
||||||
|
RuleFsts: PAnsiChar;
|
||||||
|
RuleFars: PAnsiChar;
|
||||||
|
BlankPenalty: Single;
|
||||||
|
end;
|
||||||
|
|
||||||
|
PSherpaOnnxOnlineRecognizerConfig = ^SherpaOnnxOnlineRecognizerConfig;
|
||||||
|
|
||||||
|
function SherpaOnnxCreateOnlineRecognizer(Config: PSherpaOnnxOnlineRecognizerConfig): Pointer; cdecl;
|
||||||
|
external SherpaOnnxLibName;
|
||||||
|
|
||||||
|
procedure SherpaOnnxDestroyOnlineRecognizer(Recognizer: Pointer); cdecl;
|
||||||
|
external SherpaOnnxLibName;
|
||||||
|
|
||||||
|
function SherpaOnnxCreateOnlineStream(Recognizer: Pointer): Pointer; cdecl;
|
||||||
|
external SherpaOnnxLibName;
|
||||||
|
|
||||||
|
function SherpaOnnxCreateOnlineStreamWithHotwords(Recognizer: Pointer; Hotwords: PAnsiChar): Pointer; cdecl;
|
||||||
|
external SherpaOnnxLibName;
|
||||||
|
|
||||||
|
procedure SherpaOnnxDestroyOnlineStream(Recognizer: Pointer); cdecl;
|
||||||
|
external SherpaOnnxLibName;
|
||||||
|
|
||||||
|
procedure SherpaOnnxOnlineStreamAcceptWaveform(Stream: Pointer;
|
||||||
|
SampleRate: cint32; Samples: pcfloat; N: cint32 ); cdecl;
|
||||||
|
external SherpaOnnxLibName;
|
||||||
|
|
||||||
|
procedure SherpaOnnxOnlineStreamInputFinished(Stream: Pointer); cdecl;
|
||||||
|
external SherpaOnnxLibName;
|
||||||
|
|
||||||
|
function SherpaOnnxIsOnlineStreamReady(Recognizer: Pointer; Stream: Pointer): cint32; cdecl;
|
||||||
|
external SherpaOnnxLibName;
|
||||||
|
|
||||||
|
procedure SherpaOnnxDecodeOnlineStream(Recognizer: Pointer; Stream: Pointer); cdecl;
|
||||||
|
external SherpaOnnxLibName;
|
||||||
|
|
||||||
|
procedure SherpaOnnxOnlineStreamReset(Recognizer: Pointer; Stream: Pointer); cdecl;
|
||||||
|
external SherpaOnnxLibName;
|
||||||
|
|
||||||
|
function SherpaOnnxOnlineStreamIsEndpoint(Recognizer: Pointer; Stream: Pointer): cint32; cdecl;
|
||||||
|
external SherpaOnnxLibName;
|
||||||
|
|
||||||
|
function SherpaOnnxGetOnlineStreamResultAsJson(Recognizer: Pointer; Stream: Pointer): PAnsiChar; cdecl;
|
||||||
|
external SherpaOnnxLibName;
|
||||||
|
|
||||||
|
procedure SherpaOnnxDestroyOnlineStreamResultJson(PJson: PAnsiChar); cdecl;
|
||||||
|
external SherpaOnnxLibName;
|
||||||
|
|
||||||
function SherpaOnnxReadWaveWrapper(Filename: PAnsiChar): PSherpaOnnxWave; cdecl;
|
function SherpaOnnxReadWaveWrapper(Filename: PAnsiChar): PSherpaOnnxWave; cdecl;
|
||||||
external SherpaOnnxLibName name 'SherpaOnnxReadWave';
|
external SherpaOnnxLibName name 'SherpaOnnxReadWave';
|
||||||
|
|
||||||
procedure SherpaOnnxFreeWaveWrapper(P: PSherpaOnnxWave); cdecl;
|
procedure SherpaOnnxFreeWaveWrapper(P: PSherpaOnnxWave); cdecl;
|
||||||
external SherpaOnnxLibName name 'SherpaOnnxFreeWave';
|
external SherpaOnnxLibName name 'SherpaOnnxFreeWave';
|
||||||
|
|
||||||
function SherpaOnnxReadWave(Filename: string): TSherpaOnnxWave;
|
function SherpaOnnxReadWave(Filename: AnsiString): TSherpaOnnxWave;
|
||||||
var
|
var
|
||||||
AnsiFilename: AnsiString;
|
|
||||||
PFilename: PAnsiChar;
|
PFilename: PAnsiChar;
|
||||||
PWave: PSherpaOnnxWave;
|
PWave: PSherpaOnnxWave;
|
||||||
I: Integer;
|
I: Integer;
|
||||||
begin
|
begin
|
||||||
AnsiFilename := Filename;
|
PFilename := PAnsiChar(Filename);
|
||||||
PFilename := PAnsiChar(AnsiFilename);
|
|
||||||
PWave := SherpaOnnxReadWaveWrapper(PFilename);
|
PWave := SherpaOnnxReadWaveWrapper(PFilename);
|
||||||
|
|
||||||
|
Result.Samples := nil;
|
||||||
SetLength(Result.Samples, PWave^.NumSamples);
|
SetLength(Result.Samples, PWave^.NumSamples);
|
||||||
|
|
||||||
Result.SampleRate := PWave^.SampleRate;
|
Result.SampleRate := PWave^.SampleRate;
|
||||||
|
|
||||||
for I := Low(Result.Samples) to High(Result.Samples) do
|
for I := Low(Result.Samples) to High(Result.Samples) do
|
||||||
Result.Samples[i] := PWave^.Samples[i];
|
Result.Samples[I] := PWave^.Samples[I];
|
||||||
|
|
||||||
SherpaOnnxFreeWaveWrapper(PWave);
|
SherpaOnnxFreeWaveWrapper(PWave);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOnlineTransducerModelConfig.ToString: AnsiString;
|
||||||
|
begin
|
||||||
|
Result := Format('TSherpaOnnxOnlineTransducerModelConfig(Encoder := %s, Decoder := %s, Joiner := %s)',
|
||||||
|
[Self.Encoder, Self.Decoder, Self.Joiner]);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOnlineParaformerModelConfig.ToString: AnsiString;
|
||||||
|
begin
|
||||||
|
Result := Format('TSherpaOnnxOnlineParaformerModelConfig(Encoder := %s, Decoder := %s)',
|
||||||
|
[Self.Encoder, Self.Decoder]);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOnlineZipformer2CtcModelConfig.ToString: AnsiString;
|
||||||
|
begin
|
||||||
|
Result := Format('TSherpaOnnxOnlineZipformer2CtcModelConfig(Model := %s)',
|
||||||
|
[Self.Model]);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOnlineModelConfig.ToString: AnsiString;
|
||||||
|
begin
|
||||||
|
Result := Format('TSherpaOnnxOnlineModelConfig(Transducer := %s, ' +
|
||||||
|
'Paraformer := %s,' +
|
||||||
|
'Zipformer2Ctc := %s, ' +
|
||||||
|
'Tokens := %s, ' +
|
||||||
|
'NumThreads := %d, ' +
|
||||||
|
'Provider := %s, ' +
|
||||||
|
'Debug := %s, ' +
|
||||||
|
'ModelType := %s, ' +
|
||||||
|
'ModelingUnit := %s, ' +
|
||||||
|
'BpeVocab := %s)'
|
||||||
|
,
|
||||||
|
[Self.Transducer.ToString, Self.Paraformer.ToString,
|
||||||
|
Self.Zipformer2Ctc.ToString, Self.Tokens,
|
||||||
|
Self.NumThreads, Self.Provider, Self.Debug.ToString,
|
||||||
|
Self.ModelType, Self.ModelingUnit, Self.BpeVocab
|
||||||
|
]);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxFeatureConfig.ToString: AnsiString;
|
||||||
|
begin
|
||||||
|
Result := Format('TSherpaOnnxFeatureConfig(SampleRate := %d, FeatureDim := %d)',
|
||||||
|
[Self.SampleRate, Self.FeatureDim]);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOnlineCtcFstDecoderConfig.ToString: AnsiString;
|
||||||
|
begin
|
||||||
|
Result := Format('TSherpaOnnxOnlineCtcFstDecoderConfig(Graph := %s, MaxActive := %d)',
|
||||||
|
[Self.Graph, Self.MaxActive]);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOnlineRecognizerConfig.ToString: AnsiString;
|
||||||
|
begin
|
||||||
|
Result := Format('TSherpaOnnxOnlineRecognizerConfig(FeatConfg := %s, ' +
|
||||||
|
'ModelConfig := %s, ' +
|
||||||
|
'DecodingMethod := %s, ' +
|
||||||
|
'MaxActivePaths := %d, ' +
|
||||||
|
'EnableEndpoint := %s, ' +
|
||||||
|
'Rule1MinTrailingSilence := %.1f, ' +
|
||||||
|
'Rule2MinTrailingSilence := %.1f, ' +
|
||||||
|
'Rule3MinUtteranceLength := %.1f, ' +
|
||||||
|
'HotwordsFile := %s, ' +
|
||||||
|
'HotwordsScore := %.1f, ' +
|
||||||
|
'CtcFstDecoderConfig := %s, ' +
|
||||||
|
'RuleFsts := %s, ' +
|
||||||
|
'RuleFars := %s, ' +
|
||||||
|
'BlankPenalty := %.1f' +
|
||||||
|
')'
|
||||||
|
,
|
||||||
|
[Self.FeatConfig.ToString, Self.ModelConfig.ToString,
|
||||||
|
Self.DecodingMethod, Self.MaxActivePaths, Self.EnableEndpoint.ToString,
|
||||||
|
Self.Rule1MinTrailingSilence, Self.Rule2MinTrailingSilence,
|
||||||
|
Self.Rule3MinUtteranceLength, Self.HotwordsFile, Self.HotwordsScore,
|
||||||
|
Self.CtcFstDecoderConfig.ToString, Self.RuleFsts, Self.RuleFars,
|
||||||
|
Self.BlankPenalty
|
||||||
|
]);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOnlineRecognizerResult.ToString: AnsiString;
|
||||||
|
var
|
||||||
|
TokensStr: AnsiString;
|
||||||
|
S: AnsiString;
|
||||||
|
TimestampStr: AnsiString;
|
||||||
|
T: Single;
|
||||||
|
Sep: AnsiString;
|
||||||
|
begin
|
||||||
|
TokensStr := '[';
|
||||||
|
Sep := '';
|
||||||
|
for S in Self.Tokens do
|
||||||
|
begin
|
||||||
|
TokensStr := TokensStr + Sep + S;
|
||||||
|
Sep := ', ';
|
||||||
|
end;
|
||||||
|
TokensStr := TokensStr + ']';
|
||||||
|
|
||||||
|
TimestampStr := '[';
|
||||||
|
Sep := '';
|
||||||
|
for T in Self.Timestamps do
|
||||||
|
begin
|
||||||
|
TimestampStr := TimestampStr + Sep + Format('%.2f', [T]);
|
||||||
|
Sep := ', ';
|
||||||
|
end;
|
||||||
|
TimestampStr := TimestampStr + ']';
|
||||||
|
|
||||||
|
Result := Format('TSherpaOnnxOnlineRecognizerResult(Text := %s, ' +
|
||||||
|
'Tokens := %s, ' +
|
||||||
|
'Timestamps := %s, ' +
|
||||||
|
')',
|
||||||
|
[Self.Text, TokensStr, TimestampStr]);
|
||||||
|
end;
|
||||||
|
|
||||||
|
constructor TSherpaOnnxOnlineRecognizer.Create(Config: TSherpaOnnxOnlineRecognizerConfig);
|
||||||
|
var
|
||||||
|
C: SherpaOnnxOnlineRecognizerConfig;
|
||||||
|
begin
|
||||||
|
Initialize(C);
|
||||||
|
|
||||||
|
C.FeatConfig.SampleRate := Config.FeatConfig.SampleRate;
|
||||||
|
C.FeatConfig.FeatureDim := Config.FeatConfig.FeatureDim;
|
||||||
|
|
||||||
|
C.ModelConfig.Transducer.Encoder := PAnsiChar(Config.ModelConfig.Transducer.Encoder);
|
||||||
|
C.ModelConfig.Transducer.Decoder := PAnsiChar(Config.ModelConfig.Transducer.Decoder);
|
||||||
|
C.ModelConfig.Transducer.Joiner := PAnsiChar(Config.ModelConfig.Transducer.Joiner);
|
||||||
|
|
||||||
|
C.ModelConfig.Paraformer.Encoder := PAnsiChar(Config.ModelConfig.Paraformer.Encoder);
|
||||||
|
C.ModelConfig.Paraformer.Decoder := PAnsiChar(Config.ModelConfig.Paraformer.Decoder);
|
||||||
|
|
||||||
|
C.ModelConfig.Zipformer2Ctc.Model := PAnsiChar(Config.ModelConfig.Zipformer2Ctc.Model);
|
||||||
|
|
||||||
|
C.ModelConfig.Tokens := PAnsiChar(Config.ModelConfig.Tokens);
|
||||||
|
C.ModelConfig.NumThreads := Config.ModelConfig.NumThreads;
|
||||||
|
C.ModelConfig.Provider := PAnsiChar(Config.ModelConfig.Provider);
|
||||||
|
C.ModelConfig.Debug := Ord(Config.ModelConfig.Debug);
|
||||||
|
C.ModelConfig.ModelType := PAnsiChar(Config.ModelConfig.ModelType);
|
||||||
|
C.ModelConfig.ModelingUnit := PAnsiChar(Config.ModelConfig.ModelingUnit);
|
||||||
|
C.ModelConfig.BpeVocab := PAnsiChar(Config.ModelConfig.BpeVocab);
|
||||||
|
|
||||||
|
C.DecodingMethod := PAnsiChar(Config.DecodingMethod);
|
||||||
|
C.MaxActivePaths := Config.MaxActivePaths;
|
||||||
|
C.EnableEndpoint := Ord(Config.EnableEndpoint);
|
||||||
|
C.Rule1MinTrailingSilence := Config.Rule1MinTrailingSilence;
|
||||||
|
C.Rule2MinTrailingSilence := Config.Rule2MinTrailingSilence;
|
||||||
|
C.Rule3MinUtteranceLength := Config.Rule3MinUtteranceLength;
|
||||||
|
C.HotwordsFile := PAnsiChar(Config.HotwordsFile);
|
||||||
|
C.HotwordsScore := Config.HotwordsScore;
|
||||||
|
C.CtcFstDecoderConfig.Graph := PAnsiChar(Config.CtcFstDecoderConfig.Graph);
|
||||||
|
C.CtcFstDecoderConfig.MaxActive := Config.CtcFstDecoderConfig.MaxActive;
|
||||||
|
C.RuleFsts := PAnsiChar(Config.RuleFsts);
|
||||||
|
C.RuleFars := PAnsiChar(Config.RuleFars);
|
||||||
|
C.BlankPenalty := Config.BlankPenalty;
|
||||||
|
|
||||||
|
Self.Handle := SherpaOnnxCreateOnlineRecognizer(@C);
|
||||||
|
end;
|
||||||
|
|
||||||
|
destructor TSherpaOnnxOnlineRecognizer.Destroy;
|
||||||
|
begin
|
||||||
|
SherpaOnnxDestroyOnlineRecognizer(Self.Handle);
|
||||||
|
Self.Handle := nil;
|
||||||
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOnlineRecognizer.CreateStream: TSherpaOnnxOnlineStream;
|
||||||
|
var
|
||||||
|
Stream: Pointer;
|
||||||
|
begin
|
||||||
|
Stream := SherpaOnnxCreateOnlineStream(Self.Handle);
|
||||||
|
Result := TSherpaOnnxOnlineStream.Create(Stream);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOnlineRecognizer.CreateStream(Hotwords: AnsiString): TSherpaOnnxOnlineStream;
|
||||||
|
var
|
||||||
|
Stream: Pointer;
|
||||||
|
begin
|
||||||
|
Stream := SherpaOnnxCreateOnlineStreamWithHotwords(Self.Handle, PAnsiChar(Hotwords));
|
||||||
|
Result := TSherpaOnnxOnlineStream.Create(Stream);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOnlineRecognizer.IsReady(Stream: TSherpaOnnxOnlineStream): Boolean;
|
||||||
|
begin
|
||||||
|
Result := SherpaOnnxIsOnlineStreamReady(Self.Handle, Stream.Handle) = 1;
|
||||||
|
end;
|
||||||
|
|
||||||
|
procedure TSherpaOnnxOnlineRecognizer.Decode(Stream: TSherpaOnnxOnlineStream);
|
||||||
|
begin
|
||||||
|
SherpaOnnxDecodeOnlineStream(Self.Handle, Stream.Handle);
|
||||||
|
end;
|
||||||
|
|
||||||
|
procedure TSherpaOnnxOnlineRecognizer.Reset(Stream: TSherpaOnnxOnlineStream);
|
||||||
|
begin
|
||||||
|
SherpaOnnxOnlineStreamReset(Self.Handle, Stream.Handle);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOnlineRecognizer.IsEndpoint(Stream: TSherpaOnnxOnlineStream): Boolean;
|
||||||
|
begin
|
||||||
|
Result := SherpaOnnxOnlineStreamIsEndpoint(Self.Handle, Stream.Handle) = 1;
|
||||||
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOnlineRecognizer.GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult;
|
||||||
|
var
|
||||||
|
pJson: PAnsiChar;
|
||||||
|
JsonData: TJSONData;
|
||||||
|
JsonObject : TJSONObject;
|
||||||
|
JsonEnum: TJSONEnum;
|
||||||
|
I: Integer;
|
||||||
|
begin
|
||||||
|
pJson := SherpaOnnxGetOnlineStreamResultAsJson(Self.Handle, Stream.Handle);
|
||||||
|
|
||||||
|
{
|
||||||
|
- https://www.freepascal.org/daily/doc/fcl/fpjson/getjson.html
|
||||||
|
- https://www.freepascal.org/daily/doc/fcl/fpjson/tjsondata.html
|
||||||
|
- https://www.freepascal.org/daily/doc/fcl/fpjson/tjsonobject.html
|
||||||
|
- https://www.freepascal.org/daily/doc/fcl/fpjson/tjsonenum.html
|
||||||
|
}
|
||||||
|
|
||||||
|
JsonData := GetJSON(AnsiString(pJson), False);
|
||||||
|
|
||||||
|
JsonObject := JsonData as TJSONObject;
|
||||||
|
|
||||||
|
Result.Text := JsonObject.Strings['text'];
|
||||||
|
|
||||||
|
SetLength(Result.Tokens, JsonObject.Arrays['tokens'].Count);
|
||||||
|
|
||||||
|
I := 0;
|
||||||
|
for JsonEnum in JsonObject.Arrays['tokens'] do
|
||||||
|
begin
|
||||||
|
Result.Tokens[I] := JsonEnum.Value.AsString;
|
||||||
|
Inc(I);
|
||||||
|
end;
|
||||||
|
|
||||||
|
SetLength(Result.Timestamps, JsonObject.Arrays['timestamps'].Count);
|
||||||
|
I := 0;
|
||||||
|
for JsonEnum in JsonObject.Arrays['timestamps'] do
|
||||||
|
begin
|
||||||
|
Result.Timestamps[I] := JsonEnum.Value.AsFloat;
|
||||||
|
Inc(I);
|
||||||
|
end;
|
||||||
|
|
||||||
|
SherpaOnnxDestroyOnlineStreamResultJson(pJson);
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
constructor TSherpaOnnxOnlineStream.Create(P: Pointer);
|
||||||
|
begin
|
||||||
|
Self.Handle := P;
|
||||||
|
end;
|
||||||
|
|
||||||
|
destructor TSherpaOnnxOnlineStream.Destroy;
|
||||||
|
begin
|
||||||
|
SherpaOnnxDestroyOnlineStream(Self.Handle);
|
||||||
|
Self.Handle := nil;
|
||||||
|
end;
|
||||||
|
|
||||||
|
procedure TSherpaOnnxOnlineStream.AcceptWaveform(Samples: array of Single; SampleRate: Integer);
|
||||||
|
begin
|
||||||
|
SherpaOnnxOnlineStreamAcceptWaveform(Self.Handle, SampleRate,
|
||||||
|
pcfloat(Samples), Length(Samples));
|
||||||
|
end;
|
||||||
|
|
||||||
|
procedure TSherpaOnnxOnlineStream.InputFinished;
|
||||||
|
begin
|
||||||
|
SherpaOnnxOnlineStreamInputFinished(Self.Handle);
|
||||||
|
end;
|
||||||
|
|
||||||
end.
|
end.
|
||||||
|
|||||||
Reference in New Issue
Block a user