Pascal API for non-streaming ASR (#1247)
This commit is contained in:
49
.github/workflows/pascal.yaml
vendored
49
.github/workflows/pascal.yaml
vendored
@@ -115,9 +115,11 @@ jobs:
|
|||||||
if [[ ${{ matrix.os }} == 'windows-latest' ]]; then
|
if [[ ${{ matrix.os }} == 'windows-latest' ]]; then
|
||||||
cp -v install/lib/*.dll ../pascal-api-examples/read-wav
|
cp -v install/lib/*.dll ../pascal-api-examples/read-wav
|
||||||
cp -v install/lib/*.dll ../pascal-api-examples/streaming-asr
|
cp -v install/lib/*.dll ../pascal-api-examples/streaming-asr
|
||||||
|
cp -v install/lib/*.dll ../pascal-api-examples/non-streaming-asr
|
||||||
|
|
||||||
cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/read-wav
|
cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/read-wav
|
||||||
cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/streaming-asr
|
cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/streaming-asr
|
||||||
|
cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/non-streaming-asr
|
||||||
fi
|
fi
|
||||||
|
|
||||||
- name: Run Pascal test (Read wav test)
|
- name: Run Pascal test (Read wav test)
|
||||||
@@ -133,6 +135,48 @@ jobs:
|
|||||||
ls -lh
|
ls -lh
|
||||||
popd
|
popd
|
||||||
|
|
||||||
|
- name: Run Pascal test (Non Streaming ASR)
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH
|
||||||
|
|
||||||
|
cd ./pascal-api-examples
|
||||||
|
|
||||||
|
pushd non-streaming-asr
|
||||||
|
./run-zipformer-transducer.sh
|
||||||
|
rm -rf sherpa-onnx-*
|
||||||
|
echo "---"
|
||||||
|
|
||||||
|
./run-whisper.sh
|
||||||
|
rm -rf sherpa-onnx-*
|
||||||
|
echo "---"
|
||||||
|
|
||||||
|
./run-nemo-transducer.sh
|
||||||
|
rm -rf sherpa-onnx-*
|
||||||
|
echo "---"
|
||||||
|
|
||||||
|
./run-nemo-ctc.sh
|
||||||
|
rm -rf sherpa-onnx-*
|
||||||
|
echo "---"
|
||||||
|
|
||||||
|
./run-sense-voice.sh
|
||||||
|
rm -rf sherpa-onnx-*
|
||||||
|
echo "---"
|
||||||
|
|
||||||
|
./run-telespeech-ctc.sh
|
||||||
|
rm -rf sherpa-onnx-*
|
||||||
|
echo "---"
|
||||||
|
|
||||||
|
./run-paraformer.sh
|
||||||
|
|
||||||
|
./run-paraformer-itn.sh
|
||||||
|
|
||||||
|
rm -rf sherpa-onnx-*
|
||||||
|
echo "---"
|
||||||
|
|
||||||
|
ls -lh
|
||||||
|
popd
|
||||||
|
|
||||||
- name: Run Pascal test (Streaming ASR)
|
- name: Run Pascal test (Streaming ASR)
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@@ -141,10 +185,15 @@ jobs:
|
|||||||
cd ./pascal-api-examples
|
cd ./pascal-api-examples
|
||||||
|
|
||||||
pushd streaming-asr
|
pushd streaming-asr
|
||||||
|
|
||||||
./run-zipformer-transducer.sh
|
./run-zipformer-transducer.sh
|
||||||
rm -rf sherpa-onnx-*
|
rm -rf sherpa-onnx-*
|
||||||
echo "---"
|
echo "---"
|
||||||
|
|
||||||
|
./run-nemo-transducer.sh
|
||||||
|
rm -rf sherpa-onnx-*
|
||||||
|
echo "---"
|
||||||
|
|
||||||
if [[ ${{ matrix.os }} != 'windows-latest' ]]; then
|
if [[ ${{ matrix.os }} != 'windows-latest' ]]; then
|
||||||
./run-paraformer.sh
|
./run-paraformer.sh
|
||||||
rm -rf sherpa-onnx-*
|
rm -rf sherpa-onnx-*
|
||||||
|
|||||||
16
README.md
16
README.md
@@ -25,13 +25,17 @@
|
|||||||
|
|
||||||
### Supported programming languages
|
### Supported programming languages
|
||||||
|
|
||||||
| 1. C++ | 2. C | 3. Python | 4. C# | 5. Java | 6. JavaScript |
|
| 1. C++ | 2. C | 3. Python | 4. C# | 5. Java |
|
||||||
|--------|-------|-----------|-------|---------|---------------|
|
|--------|-------|-----------|-------|---------|
|
||||||
| ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ |
|
| ✔️ | ✔️ | ✔️ | ✔️ | ✔️ |
|
||||||
|
|
||||||
| 7. Kotlin | 8. Swift | 9. Go | 10. Dart | 11. Rust | 12. Pascal |
|
| 6. JavaScript | 7. Kotlin | 8. Swift | 9. Go | 10. Dart |
|
||||||
|-----------|----------|-------|----------|----------|------------|
|
|---------------|-----------|----------|-------|----------|
|
||||||
| ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ |
|
| ✔️ | ✔️ | ✔️ | ✔️ | ✔️ |
|
||||||
|
|
||||||
|
| 11. Rust | 12. Pascal |
|
||||||
|
|----------|------------|
|
||||||
|
| ✔️ | ✔️ |
|
||||||
|
|
||||||
For Rust support, please see https://github.com/thewh1teagle/sherpa-rs
|
For Rust support, please see https://github.com/thewh1teagle/sherpa-rs
|
||||||
|
|
||||||
|
|||||||
@@ -7,3 +7,4 @@ APIs of [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).
|
|||||||
|---------|------------|
|
|---------|------------|
|
||||||
|[read-wav](./read-wav)|It shows how to read a wave file.|
|
|[read-wav](./read-wav)|It shows how to read a wave file.|
|
||||||
|[streaming-asr](./streaming-asr)| It shows how to use streaming models for speech recognition.|
|
|[streaming-asr](./streaming-asr)| It shows how to use streaming models for speech recognition.|
|
||||||
|
|[non-streaming-asr](./non-streaming-asr)| It shows how to use non-streaming models for speech recognition.|
|
||||||
|
|||||||
9
pascal-api-examples/non-streaming-asr/.gitignore
vendored
Normal file
9
pascal-api-examples/non-streaming-asr/.gitignore
vendored
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
!run-*.sh
|
||||||
|
zipformer_transducer
|
||||||
|
whisper
|
||||||
|
nemo_transducer
|
||||||
|
nemo_ctc
|
||||||
|
paraformer
|
||||||
|
paraformer_itn
|
||||||
|
sense_voice
|
||||||
|
telespeech_ctc
|
||||||
15
pascal-api-examples/non-streaming-asr/README.md
Normal file
15
pascal-api-examples/non-streaming-asr/README.md
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
# Introduction
|
||||||
|
|
||||||
|
This folder contains examples about using sherpa-onnx's object pascal
|
||||||
|
APIs with non-streaming models for speech recognition.
|
||||||
|
|
||||||
|
|File|Description|
|
||||||
|
|----|-----------|
|
||||||
|
|[run-nemo-ctc.sh](./run-nemo-ctc.sh)|Use a non-streaming NeMo CTC model for speech recognition|
|
||||||
|
|[run-nemo-transducer.sh](./run-nemo-transducer.sh)|Use a non-streaming NeMo transducer model for speech recognition|
|
||||||
|
|[run-paraformer-itn.sh](./run-paraformer-itn.sh)|Use a non-streaming Paraformer model for speech recognition with inverse text normalization for numbers|
|
||||||
|
|[run-paraformer.sh](./run-paraformer.sh)|Use a non-streaming Paraformer model for speech recognition|
|
||||||
|
|[run-sense-voice.sh](./run-sense-voice.sh)|Use a non-streaming SenseVoice model for speech recognition|
|
||||||
|
|[run-telespeech-ctc.sh](./run-telespeech-ctc.sh)|Use a non-streaming TeleSpeech CTC model for speech recognition|
|
||||||
|
|[run-whisper.sh](./run-whisper.sh)|Use a Whisper model for speech recognition|
|
||||||
|
|[run-zipformer-transducer.sh](./run-zipformer-transducer.sh)|Use a non-streaming Zipformer transducer model for speech recognition|
|
||||||
74
pascal-api-examples/non-streaming-asr/nemo_ctc.pas
Normal file
74
pascal-api-examples/non-streaming-asr/nemo_ctc.pas
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
{ Copyright (c) 2024 Xiaomi Corporation }
|
||||||
|
|
||||||
|
{
|
||||||
|
This file shows how to use a non-streaming NeMo CTC model
|
||||||
|
to decode files.
|
||||||
|
|
||||||
|
You can download the model files from
|
||||||
|
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
}
|
||||||
|
|
||||||
|
program nemo_ctc;
|
||||||
|
|
||||||
|
{$mode objfpc}
|
||||||
|
|
||||||
|
uses
|
||||||
|
sherpa_onnx,
|
||||||
|
DateUtils,
|
||||||
|
SysUtils;
|
||||||
|
|
||||||
|
var
|
||||||
|
Wave: TSherpaOnnxWave;
|
||||||
|
WaveFilename: AnsiString;
|
||||||
|
|
||||||
|
Config: TSherpaOnnxOfflineRecognizerConfig;
|
||||||
|
Recognizer: TSherpaOnnxOfflineRecognizer;
|
||||||
|
Stream: TSherpaOnnxOfflineStream;
|
||||||
|
RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
|
||||||
|
|
||||||
|
Start: TDateTime;
|
||||||
|
Stop: TDateTime;
|
||||||
|
|
||||||
|
Elapsed: Single;
|
||||||
|
Duration: Single;
|
||||||
|
RealTimeFactor: Single;
|
||||||
|
begin
|
||||||
|
Config.ModelConfig.NeMoCtC.Model := './sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/model.onnx';
|
||||||
|
Config.ModelConfig.Tokens := './sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt';
|
||||||
|
Config.ModelConfig.Provider := 'cpu';
|
||||||
|
Config.ModelConfig.NumThreads := 1;
|
||||||
|
Config.ModelConfig.Debug := False;
|
||||||
|
|
||||||
|
WaveFilename := './sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/es-spanish.wav';
|
||||||
|
|
||||||
|
Wave := SherpaOnnxReadWave(WaveFilename);
|
||||||
|
|
||||||
|
Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
|
||||||
|
Stream := Recognizer.CreateStream();
|
||||||
|
Start := Now;
|
||||||
|
|
||||||
|
Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
|
||||||
|
Recognizer.Decode(Stream);
|
||||||
|
|
||||||
|
RecognitionResult := Recognizer.GetResult(Stream);
|
||||||
|
|
||||||
|
Stop := Now;
|
||||||
|
|
||||||
|
Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
|
||||||
|
Duration := Length(Wave.Samples) / Wave.SampleRate;
|
||||||
|
RealTimeFactor := Elapsed / Duration;
|
||||||
|
|
||||||
|
WriteLn(RecognitionResult.ToString);
|
||||||
|
WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
|
||||||
|
WriteLn(Format('Elapsed %.3f s', [Elapsed]));
|
||||||
|
WriteLn(Format('Wave duration %.3f s', [Duration]));
|
||||||
|
WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
|
||||||
|
|
||||||
|
{Free resources to avoid memory leak.
|
||||||
|
|
||||||
|
Note: You don't need to invoke them for this simple script.
|
||||||
|
However, you have to invoke them in your own large/complex project.
|
||||||
|
}
|
||||||
|
FreeAndNil(Stream);
|
||||||
|
FreeAndNil(Recognizer);
|
||||||
|
end.
|
||||||
77
pascal-api-examples/non-streaming-asr/nemo_transducer.pas
Normal file
77
pascal-api-examples/non-streaming-asr/nemo_transducer.pas
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
{ Copyright (c) 2024 Xiaomi Corporation }
|
||||||
|
|
||||||
|
{
|
||||||
|
This file shows how to use a non-streaming NeMo transducer
|
||||||
|
to decode files.
|
||||||
|
|
||||||
|
You can download the model files from
|
||||||
|
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
}
|
||||||
|
|
||||||
|
program nemo_transducer;
|
||||||
|
|
||||||
|
{$mode objfpc}
|
||||||
|
|
||||||
|
uses
|
||||||
|
sherpa_onnx,
|
||||||
|
DateUtils,
|
||||||
|
SysUtils;
|
||||||
|
|
||||||
|
var
|
||||||
|
Wave: TSherpaOnnxWave;
|
||||||
|
WaveFilename: AnsiString;
|
||||||
|
|
||||||
|
Config: TSherpaOnnxOfflineRecognizerConfig;
|
||||||
|
Recognizer: TSherpaOnnxOfflineRecognizer;
|
||||||
|
Stream: TSherpaOnnxOfflineStream;
|
||||||
|
RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
|
||||||
|
|
||||||
|
Start: TDateTime;
|
||||||
|
Stop: TDateTime;
|
||||||
|
|
||||||
|
Elapsed: Single;
|
||||||
|
Duration: Single;
|
||||||
|
RealTimeFactor: Single;
|
||||||
|
begin
|
||||||
|
Config.ModelConfig.Transducer.Encoder := './sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/encoder.onnx';
|
||||||
|
Config.ModelConfig.Transducer.Decoder := './sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/decoder.onnx';
|
||||||
|
Config.ModelConfig.Transducer.Joiner := './sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/joiner.onnx';
|
||||||
|
Config.ModelConfig.ModelType := 'nemo_transducer';
|
||||||
|
Config.ModelConfig.Tokens := './sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt';
|
||||||
|
Config.ModelConfig.Provider := 'cpu';
|
||||||
|
Config.ModelConfig.NumThreads := 1;
|
||||||
|
Config.ModelConfig.Debug := False;
|
||||||
|
|
||||||
|
WaveFilename := './sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/de-german.wav';
|
||||||
|
|
||||||
|
Wave := SherpaOnnxReadWave(WaveFilename);
|
||||||
|
|
||||||
|
Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
|
||||||
|
Stream := Recognizer.CreateStream();
|
||||||
|
Start := Now;
|
||||||
|
|
||||||
|
Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
|
||||||
|
Recognizer.Decode(Stream);
|
||||||
|
|
||||||
|
RecognitionResult := Recognizer.GetResult(Stream);
|
||||||
|
|
||||||
|
Stop := Now;
|
||||||
|
|
||||||
|
Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
|
||||||
|
Duration := Length(Wave.Samples) / Wave.SampleRate;
|
||||||
|
RealTimeFactor := Elapsed / Duration;
|
||||||
|
|
||||||
|
WriteLn(RecognitionResult.ToString);
|
||||||
|
WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
|
||||||
|
WriteLn(Format('Elapsed %.3f s', [Elapsed]));
|
||||||
|
WriteLn(Format('Wave duration %.3f s', [Duration]));
|
||||||
|
WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
|
||||||
|
|
||||||
|
{Free resources to avoid memory leak.
|
||||||
|
|
||||||
|
Note: You don't need to invoke them for this simple script.
|
||||||
|
However, you have to invoke them in your own large/complex project.
|
||||||
|
}
|
||||||
|
FreeAndNil(Stream);
|
||||||
|
FreeAndNil(Recognizer);
|
||||||
|
end.
|
||||||
74
pascal-api-examples/non-streaming-asr/paraformer.pas
Normal file
74
pascal-api-examples/non-streaming-asr/paraformer.pas
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
{ Copyright (c) 2024 Xiaomi Corporation }
|
||||||
|
|
||||||
|
{
|
||||||
|
This file shows how to use a non-streaming Paraformer model
|
||||||
|
to decode files.
|
||||||
|
|
||||||
|
You can download the model files from
|
||||||
|
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
}
|
||||||
|
|
||||||
|
program paraformer;
|
||||||
|
|
||||||
|
{$mode objfpc}
|
||||||
|
|
||||||
|
uses
|
||||||
|
sherpa_onnx,
|
||||||
|
DateUtils,
|
||||||
|
SysUtils;
|
||||||
|
|
||||||
|
var
|
||||||
|
Wave: TSherpaOnnxWave;
|
||||||
|
WaveFilename: AnsiString;
|
||||||
|
|
||||||
|
Config: TSherpaOnnxOfflineRecognizerConfig;
|
||||||
|
Recognizer: TSherpaOnnxOfflineRecognizer;
|
||||||
|
Stream: TSherpaOnnxOfflineStream;
|
||||||
|
RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
|
||||||
|
|
||||||
|
Start: TDateTime;
|
||||||
|
Stop: TDateTime;
|
||||||
|
|
||||||
|
Elapsed: Single;
|
||||||
|
Duration: Single;
|
||||||
|
RealTimeFactor: Single;
|
||||||
|
begin
|
||||||
|
Config.ModelConfig.Paraformer.Model := './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx';
|
||||||
|
Config.ModelConfig.Tokens := './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt';
|
||||||
|
Config.ModelConfig.Provider := 'cpu';
|
||||||
|
Config.ModelConfig.NumThreads := 1;
|
||||||
|
Config.ModelConfig.Debug := False;
|
||||||
|
|
||||||
|
WaveFilename := './sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/3-sichuan.wav';
|
||||||
|
|
||||||
|
Wave := SherpaOnnxReadWave(WaveFilename);
|
||||||
|
|
||||||
|
Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
|
||||||
|
Stream := Recognizer.CreateStream();
|
||||||
|
Start := Now;
|
||||||
|
|
||||||
|
Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
|
||||||
|
Recognizer.Decode(Stream);
|
||||||
|
|
||||||
|
RecognitionResult := Recognizer.GetResult(Stream);
|
||||||
|
|
||||||
|
Stop := Now;
|
||||||
|
|
||||||
|
Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
|
||||||
|
Duration := Length(Wave.Samples) / Wave.SampleRate;
|
||||||
|
RealTimeFactor := Elapsed / Duration;
|
||||||
|
|
||||||
|
WriteLn(RecognitionResult.ToString);
|
||||||
|
WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
|
||||||
|
WriteLn(Format('Elapsed %.3f s', [Elapsed]));
|
||||||
|
WriteLn(Format('Wave duration %.3f s', [Duration]));
|
||||||
|
WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
|
||||||
|
|
||||||
|
{Free resources to avoid memory leak.
|
||||||
|
|
||||||
|
Note: You don't need to invoke them for this simple script.
|
||||||
|
However, you have to invoke them in your own large/complex project.
|
||||||
|
}
|
||||||
|
FreeAndNil(Stream);
|
||||||
|
FreeAndNil(Recognizer);
|
||||||
|
end.
|
||||||
75
pascal-api-examples/non-streaming-asr/paraformer_itn.pas
Normal file
75
pascal-api-examples/non-streaming-asr/paraformer_itn.pas
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
{ Copyright (c) 2024 Xiaomi Corporation }
|
||||||
|
|
||||||
|
{
|
||||||
|
This file shows how to use a non-streaming Paraformer model
|
||||||
|
to decode files with inverse text normalization for numbers.
|
||||||
|
|
||||||
|
You can download the model files from
|
||||||
|
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
}
|
||||||
|
|
||||||
|
program paraformer_itn;
|
||||||
|
|
||||||
|
{$mode objfpc}
|
||||||
|
|
||||||
|
uses
|
||||||
|
sherpa_onnx,
|
||||||
|
DateUtils,
|
||||||
|
SysUtils;
|
||||||
|
|
||||||
|
var
|
||||||
|
Wave: TSherpaOnnxWave;
|
||||||
|
WaveFilename: AnsiString;
|
||||||
|
|
||||||
|
Config: TSherpaOnnxOfflineRecognizerConfig;
|
||||||
|
Recognizer: TSherpaOnnxOfflineRecognizer;
|
||||||
|
Stream: TSherpaOnnxOfflineStream;
|
||||||
|
RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
|
||||||
|
|
||||||
|
Start: TDateTime;
|
||||||
|
Stop: TDateTime;
|
||||||
|
|
||||||
|
Elapsed: Single;
|
||||||
|
Duration: Single;
|
||||||
|
RealTimeFactor: Single;
|
||||||
|
begin
|
||||||
|
Config.ModelConfig.Paraformer.Model := './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx';
|
||||||
|
Config.ModelConfig.Tokens := './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt';
|
||||||
|
Config.ModelConfig.Provider := 'cpu';
|
||||||
|
Config.ModelConfig.NumThreads := 1;
|
||||||
|
Config.ModelConfig.Debug := False;
|
||||||
|
Config.RuleFsts := './itn_zh_number.fst';
|
||||||
|
|
||||||
|
WaveFilename := './itn-zh-number.wav';
|
||||||
|
|
||||||
|
Wave := SherpaOnnxReadWave(WaveFilename);
|
||||||
|
|
||||||
|
Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
|
||||||
|
Stream := Recognizer.CreateStream();
|
||||||
|
Start := Now;
|
||||||
|
|
||||||
|
Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
|
||||||
|
Recognizer.Decode(Stream);
|
||||||
|
|
||||||
|
RecognitionResult := Recognizer.GetResult(Stream);
|
||||||
|
|
||||||
|
Stop := Now;
|
||||||
|
|
||||||
|
Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
|
||||||
|
Duration := Length(Wave.Samples) / Wave.SampleRate;
|
||||||
|
RealTimeFactor := Elapsed / Duration;
|
||||||
|
|
||||||
|
WriteLn(RecognitionResult.ToString);
|
||||||
|
WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
|
||||||
|
WriteLn(Format('Elapsed %.3f s', [Elapsed]));
|
||||||
|
WriteLn(Format('Wave duration %.3f s', [Duration]));
|
||||||
|
WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
|
||||||
|
|
||||||
|
{Free resources to avoid memory leak.
|
||||||
|
|
||||||
|
Note: You don't need to invoke them for this simple script.
|
||||||
|
However, you have to invoke them in your own large/complex project.
|
||||||
|
}
|
||||||
|
FreeAndNil(Stream);
|
||||||
|
FreeAndNil(Recognizer);
|
||||||
|
end.
|
||||||
41
pascal-api-examples/non-streaming-asr/run-nemo-ctc.sh
Executable file
41
pascal-api-examples/non-streaming-asr/run-nemo-ctc.sh
Executable file
@@ -0,0 +1,41 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
|
||||||
|
|
||||||
|
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
|
||||||
|
|
||||||
|
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
|
||||||
|
mkdir -p ../../build
|
||||||
|
pushd ../../build
|
||||||
|
cmake \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=./install \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||||
|
-DBUILD_SHARED_LIBS=ON \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||||
|
..
|
||||||
|
|
||||||
|
cmake --build . --target install --config Release
|
||||||
|
ls -lh lib
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
|
||||||
|
rm sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
fpc \
|
||||||
|
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
|
||||||
|
-Fl$SHERPA_ONNX_DIR/build/install/lib \
|
||||||
|
./nemo_ctc.pas
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
|
||||||
|
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
|
||||||
|
./nemo_ctc
|
||||||
42
pascal-api-examples/non-streaming-asr/run-nemo-transducer.sh
Executable file
42
pascal-api-examples/non-streaming-asr/run-nemo-transducer.sh
Executable file
@@ -0,0 +1,42 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
|
||||||
|
|
||||||
|
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
|
||||||
|
|
||||||
|
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
|
||||||
|
mkdir -p ../../build
|
||||||
|
pushd ../../build
|
||||||
|
cmake \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=./install \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||||
|
-DBUILD_SHARED_LIBS=ON \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||||
|
..
|
||||||
|
|
||||||
|
cmake --build . --target install --config Release
|
||||||
|
ls -lh lib
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
|
||||||
|
|
||||||
|
tar xvf sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
|
||||||
|
rm sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
fpc \
|
||||||
|
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
|
||||||
|
-Fl$SHERPA_ONNX_DIR/build/install/lib \
|
||||||
|
./nemo_transducer.pas
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
|
||||||
|
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
|
||||||
|
./nemo_transducer
|
||||||
50
pascal-api-examples/non-streaming-asr/run-paraformer-itn.sh
Executable file
50
pascal-api-examples/non-streaming-asr/run-paraformer-itn.sh
Executable file
@@ -0,0 +1,50 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
|
||||||
|
|
||||||
|
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
|
||||||
|
|
||||||
|
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
|
||||||
|
mkdir -p ../../build
|
||||||
|
pushd ../../build
|
||||||
|
cmake \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=./install \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||||
|
-DBUILD_SHARED_LIBS=ON \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||||
|
..
|
||||||
|
|
||||||
|
cmake --build . --target install --config Release
|
||||||
|
ls -lh lib
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
|
||||||
|
|
||||||
|
tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
|
||||||
|
rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./itn-zh-number.wav ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./itn_zh_number.fst ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||||
|
fi
|
||||||
|
|
||||||
|
fpc \
|
||||||
|
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
|
||||||
|
-Fl$SHERPA_ONNX_DIR/build/install/lib \
|
||||||
|
./paraformer_itn.pas
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
|
||||||
|
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
|
||||||
|
./paraformer_itn
|
||||||
42
pascal-api-examples/non-streaming-asr/run-paraformer.sh
Executable file
42
pascal-api-examples/non-streaming-asr/run-paraformer.sh
Executable file
@@ -0,0 +1,42 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
|
||||||
|
|
||||||
|
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
|
||||||
|
|
||||||
|
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
|
||||||
|
mkdir -p ../../build
|
||||||
|
pushd ../../build
|
||||||
|
cmake \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=./install \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||||
|
-DBUILD_SHARED_LIBS=ON \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||||
|
..
|
||||||
|
|
||||||
|
cmake --build . --target install --config Release
|
||||||
|
ls -lh lib
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
|
||||||
|
|
||||||
|
tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
|
||||||
|
rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
fpc \
|
||||||
|
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
|
||||||
|
-Fl$SHERPA_ONNX_DIR/build/install/lib \
|
||||||
|
./paraformer.pas
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
|
||||||
|
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
|
||||||
|
./paraformer
|
||||||
41
pascal-api-examples/non-streaming-asr/run-sense-voice.sh
Executable file
41
pascal-api-examples/non-streaming-asr/run-sense-voice.sh
Executable file
@@ -0,0 +1,41 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
|
||||||
|
|
||||||
|
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
|
||||||
|
|
||||||
|
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
|
||||||
|
mkdir -p ../../build
|
||||||
|
pushd ../../build
|
||||||
|
cmake \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=./install \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||||
|
-DBUILD_SHARED_LIBS=ON \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||||
|
..
|
||||||
|
|
||||||
|
cmake --build . --target install --config Release
|
||||||
|
ls -lh lib
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
|
||||||
|
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
fpc \
|
||||||
|
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
|
||||||
|
-Fl$SHERPA_ONNX_DIR/build/install/lib \
|
||||||
|
./sense_voice.pas
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
|
||||||
|
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
|
||||||
|
./sense_voice
|
||||||
42
pascal-api-examples/non-streaming-asr/run-telespeech-ctc.sh
Executable file
42
pascal-api-examples/non-streaming-asr/run-telespeech-ctc.sh
Executable file
@@ -0,0 +1,42 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
|
||||||
|
|
||||||
|
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
|
||||||
|
|
||||||
|
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
|
||||||
|
mkdir -p ../../build
|
||||||
|
pushd ../../build
|
||||||
|
cmake \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=./install \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||||
|
-DBUILD_SHARED_LIBS=ON \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||||
|
..
|
||||||
|
|
||||||
|
cmake --build . --target install --config Release
|
||||||
|
ls -lh lib
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
|
||||||
|
|
||||||
|
tar xvf sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
|
||||||
|
rm sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
fpc \
|
||||||
|
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
|
||||||
|
-Fl$SHERPA_ONNX_DIR/build/install/lib \
|
||||||
|
./telespeech_ctc.pas
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
|
||||||
|
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
|
||||||
|
./telespeech_ctc
|
||||||
42
pascal-api-examples/non-streaming-asr/run-whisper.sh
Executable file
42
pascal-api-examples/non-streaming-asr/run-whisper.sh
Executable file
@@ -0,0 +1,42 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
|
||||||
|
|
||||||
|
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
|
||||||
|
|
||||||
|
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
|
||||||
|
mkdir -p ../../build
|
||||||
|
pushd ../../build
|
||||||
|
cmake \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=./install \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||||
|
-DBUILD_SHARED_LIBS=ON \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||||
|
..
|
||||||
|
|
||||||
|
cmake --build . --target install --config Release
|
||||||
|
ls -lh lib
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||||
|
|
||||||
|
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||||
|
rm sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
fpc \
|
||||||
|
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
|
||||||
|
-Fl$SHERPA_ONNX_DIR/build/install/lib \
|
||||||
|
./whisper.pas
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
|
||||||
|
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
|
||||||
|
./whisper
|
||||||
42
pascal-api-examples/non-streaming-asr/run-zipformer-transducer.sh
Executable file
42
pascal-api-examples/non-streaming-asr/run-zipformer-transducer.sh
Executable file
@@ -0,0 +1,42 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
|
||||||
|
|
||||||
|
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
|
||||||
|
|
||||||
|
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
|
||||||
|
mkdir -p ../../build
|
||||||
|
pushd ../../build
|
||||||
|
cmake \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=./install \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||||
|
-DBUILD_SHARED_LIBS=ON \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||||
|
..
|
||||||
|
|
||||||
|
cmake --build . --target install --config Release
|
||||||
|
ls -lh lib
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/tokens.txt ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2
|
||||||
|
|
||||||
|
tar xvf sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2
|
||||||
|
rm sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
fpc \
|
||||||
|
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
|
||||||
|
-Fl$SHERPA_ONNX_DIR/build/install/lib \
|
||||||
|
./zipformer_transducer.pas
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
|
||||||
|
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
|
||||||
|
./zipformer_transducer
|
||||||
76
pascal-api-examples/non-streaming-asr/sense_voice.pas
Normal file
76
pascal-api-examples/non-streaming-asr/sense_voice.pas
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
{ Copyright (c) 2024 Xiaomi Corporation }
|
||||||
|
|
||||||
|
{
|
||||||
|
This file shows how to use a non-streaming SenseVoice model
|
||||||
|
to decode files.
|
||||||
|
|
||||||
|
You can download the model files from
|
||||||
|
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
}
|
||||||
|
|
||||||
|
program sense_voice;
|
||||||
|
|
||||||
|
{$mode objfpc}
|
||||||
|
|
||||||
|
uses
|
||||||
|
sherpa_onnx,
|
||||||
|
DateUtils,
|
||||||
|
SysUtils;
|
||||||
|
|
||||||
|
var
|
||||||
|
Wave: TSherpaOnnxWave;
|
||||||
|
WaveFilename: AnsiString;
|
||||||
|
|
||||||
|
Config: TSherpaOnnxOfflineRecognizerConfig;
|
||||||
|
Recognizer: TSherpaOnnxOfflineRecognizer;
|
||||||
|
Stream: TSherpaOnnxOfflineStream;
|
||||||
|
RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
|
||||||
|
|
||||||
|
Start: TDateTime;
|
||||||
|
Stop: TDateTime;
|
||||||
|
|
||||||
|
Elapsed: Single;
|
||||||
|
Duration: Single;
|
||||||
|
RealTimeFactor: Single;
|
||||||
|
begin
|
||||||
|
Config.ModelConfig.SenseVoice.Model := './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx';
|
||||||
|
Config.ModelConfig.SenseVoice.Language := 'auto';
|
||||||
|
Config.ModelConfig.SenseVoice.UseItn := False;
|
||||||
|
Config.ModelConfig.Tokens := './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt';
|
||||||
|
Config.ModelConfig.Provider := 'cpu';
|
||||||
|
Config.ModelConfig.NumThreads := 1;
|
||||||
|
Config.ModelConfig.Debug := False;
|
||||||
|
|
||||||
|
WaveFilename := './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav';
|
||||||
|
|
||||||
|
Wave := SherpaOnnxReadWave(WaveFilename);
|
||||||
|
|
||||||
|
Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
|
||||||
|
Stream := Recognizer.CreateStream();
|
||||||
|
Start := Now;
|
||||||
|
|
||||||
|
Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
|
||||||
|
Recognizer.Decode(Stream);
|
||||||
|
|
||||||
|
RecognitionResult := Recognizer.GetResult(Stream);
|
||||||
|
|
||||||
|
Stop := Now;
|
||||||
|
|
||||||
|
Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
|
||||||
|
Duration := Length(Wave.Samples) / Wave.SampleRate;
|
||||||
|
RealTimeFactor := Elapsed / Duration;
|
||||||
|
|
||||||
|
WriteLn(RecognitionResult.ToString);
|
||||||
|
WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
|
||||||
|
WriteLn(Format('Elapsed %.3f s', [Elapsed]));
|
||||||
|
WriteLn(Format('Wave duration %.3f s', [Duration]));
|
||||||
|
WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
|
||||||
|
|
||||||
|
{Free resources to avoid memory leak.
|
||||||
|
|
||||||
|
Note: You don't need to invoke them for this simple script.
|
||||||
|
However, you have to invoke them in your own large/complex project.
|
||||||
|
}
|
||||||
|
FreeAndNil(Stream);
|
||||||
|
FreeAndNil(Recognizer);
|
||||||
|
end.
|
||||||
74
pascal-api-examples/non-streaming-asr/telespeech_ctc.pas
Normal file
74
pascal-api-examples/non-streaming-asr/telespeech_ctc.pas
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
{ Copyright (c) 2024 Xiaomi Corporation }
|
||||||
|
|
||||||
|
{
|
||||||
|
This file shows how to use a non-streaming TeleSpeech CTC model
|
||||||
|
to decode files.
|
||||||
|
|
||||||
|
You can download the model files from
|
||||||
|
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
}
|
||||||
|
|
||||||
|
program telespeech_ctc;
|
||||||
|
|
||||||
|
{$mode objfpc}
|
||||||
|
|
||||||
|
uses
|
||||||
|
sherpa_onnx,
|
||||||
|
DateUtils,
|
||||||
|
SysUtils;
|
||||||
|
|
||||||
|
var
|
||||||
|
Wave: TSherpaOnnxWave;
|
||||||
|
WaveFilename: AnsiString;
|
||||||
|
|
||||||
|
Config: TSherpaOnnxOfflineRecognizerConfig;
|
||||||
|
Recognizer: TSherpaOnnxOfflineRecognizer;
|
||||||
|
Stream: TSherpaOnnxOfflineStream;
|
||||||
|
RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
|
||||||
|
|
||||||
|
Start: TDateTime;
|
||||||
|
Stop: TDateTime;
|
||||||
|
|
||||||
|
Elapsed: Single;
|
||||||
|
Duration: Single;
|
||||||
|
RealTimeFactor: Single;
|
||||||
|
begin
|
||||||
|
Config.ModelConfig.TeleSpeechCtc := './sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx';
|
||||||
|
Config.ModelConfig.Tokens := './sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt';
|
||||||
|
Config.ModelConfig.Provider := 'cpu';
|
||||||
|
Config.ModelConfig.NumThreads := 1;
|
||||||
|
Config.ModelConfig.Debug := False;
|
||||||
|
|
||||||
|
WaveFilename := './sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/test_wavs/3-sichuan.wav';
|
||||||
|
|
||||||
|
Wave := SherpaOnnxReadWave(WaveFilename);
|
||||||
|
|
||||||
|
Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
|
||||||
|
Stream := Recognizer.CreateStream();
|
||||||
|
Start := Now;
|
||||||
|
|
||||||
|
Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
|
||||||
|
Recognizer.Decode(Stream);
|
||||||
|
|
||||||
|
RecognitionResult := Recognizer.GetResult(Stream);
|
||||||
|
|
||||||
|
Stop := Now;
|
||||||
|
|
||||||
|
Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
|
||||||
|
Duration := Length(Wave.Samples) / Wave.SampleRate;
|
||||||
|
RealTimeFactor := Elapsed / Duration;
|
||||||
|
|
||||||
|
WriteLn(RecognitionResult.ToString);
|
||||||
|
WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
|
||||||
|
WriteLn(Format('Elapsed %.3f s', [Elapsed]));
|
||||||
|
WriteLn(Format('Wave duration %.3f s', [Duration]));
|
||||||
|
WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
|
||||||
|
|
||||||
|
{Free resources to avoid memory leak.
|
||||||
|
|
||||||
|
Note: You don't need to invoke them for this simple script.
|
||||||
|
However, you have to invoke them in your own large/complex project.
|
||||||
|
}
|
||||||
|
FreeAndNil(Stream);
|
||||||
|
FreeAndNil(Recognizer);
|
||||||
|
end.
|
||||||
75
pascal-api-examples/non-streaming-asr/whisper.pas
Normal file
75
pascal-api-examples/non-streaming-asr/whisper.pas
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
{ Copyright (c) 2024 Xiaomi Corporation }
|
||||||
|
|
||||||
|
{
|
||||||
|
This file shows how to use a non-streaming Whisper model
|
||||||
|
to decode files.
|
||||||
|
|
||||||
|
You can download the model files from
|
||||||
|
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
}
|
||||||
|
|
||||||
|
program whisper;
|
||||||
|
|
||||||
|
{$mode objfpc}
|
||||||
|
|
||||||
|
uses
|
||||||
|
sherpa_onnx,
|
||||||
|
DateUtils,
|
||||||
|
SysUtils;
|
||||||
|
|
||||||
|
var
|
||||||
|
Wave: TSherpaOnnxWave;
|
||||||
|
WaveFilename: AnsiString;
|
||||||
|
|
||||||
|
Config: TSherpaOnnxOfflineRecognizerConfig;
|
||||||
|
Recognizer: TSherpaOnnxOfflineRecognizer;
|
||||||
|
Stream: TSherpaOnnxOfflineStream;
|
||||||
|
RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
|
||||||
|
|
||||||
|
Start: TDateTime;
|
||||||
|
Stop: TDateTime;
|
||||||
|
|
||||||
|
Elapsed: Single;
|
||||||
|
Duration: Single;
|
||||||
|
RealTimeFactor: Single;
|
||||||
|
begin
|
||||||
|
Config.ModelConfig.Whisper.Encoder := './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx';
|
||||||
|
Config.ModelConfig.Whisper.Decoder := './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx';
|
||||||
|
Config.ModelConfig.Tokens := './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt';
|
||||||
|
Config.ModelConfig.Provider := 'cpu';
|
||||||
|
Config.ModelConfig.NumThreads := 1;
|
||||||
|
Config.ModelConfig.Debug := False;
|
||||||
|
|
||||||
|
WaveFilename := './sherpa-onnx-whisper-tiny.en/test_wavs/0.wav';
|
||||||
|
|
||||||
|
Wave := SherpaOnnxReadWave(WaveFilename);
|
||||||
|
|
||||||
|
Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
|
||||||
|
Stream := Recognizer.CreateStream();
|
||||||
|
Start := Now;
|
||||||
|
|
||||||
|
Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
|
||||||
|
Recognizer.Decode(Stream);
|
||||||
|
|
||||||
|
RecognitionResult := Recognizer.GetResult(Stream);
|
||||||
|
|
||||||
|
Stop := Now;
|
||||||
|
|
||||||
|
Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
|
||||||
|
Duration := Length(Wave.Samples) / Wave.SampleRate;
|
||||||
|
RealTimeFactor := Elapsed / Duration;
|
||||||
|
|
||||||
|
WriteLn(RecognitionResult.ToString);
|
||||||
|
WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
|
||||||
|
WriteLn(Format('Elapsed %.3f s', [Elapsed]));
|
||||||
|
WriteLn(Format('Wave duration %.3f s', [Duration]));
|
||||||
|
WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
|
||||||
|
|
||||||
|
{Free resources to avoid memory leak.
|
||||||
|
|
||||||
|
Note: You don't need to invoke them for this simple script.
|
||||||
|
However, you have to invoke them in your own large/complex project.
|
||||||
|
}
|
||||||
|
FreeAndNil(Stream);
|
||||||
|
FreeAndNil(Recognizer);
|
||||||
|
end.
|
||||||
@@ -0,0 +1,76 @@
|
|||||||
|
{ Copyright (c) 2024 Xiaomi Corporation }
|
||||||
|
|
||||||
|
{
|
||||||
|
This file shows how to use a non-streaming Zipformer transducer
|
||||||
|
to decode files.
|
||||||
|
|
||||||
|
You can download the model files from
|
||||||
|
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
}
|
||||||
|
|
||||||
|
program zipformer_transducer;
|
||||||
|
|
||||||
|
{$mode objfpc}
|
||||||
|
|
||||||
|
uses
|
||||||
|
sherpa_onnx,
|
||||||
|
DateUtils,
|
||||||
|
SysUtils;
|
||||||
|
|
||||||
|
var
|
||||||
|
Wave: TSherpaOnnxWave;
|
||||||
|
WaveFilename: AnsiString;
|
||||||
|
|
||||||
|
Config: TSherpaOnnxOfflineRecognizerConfig;
|
||||||
|
Recognizer: TSherpaOnnxOfflineRecognizer;
|
||||||
|
Stream: TSherpaOnnxOfflineStream;
|
||||||
|
RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
|
||||||
|
|
||||||
|
Start: TDateTime;
|
||||||
|
Stop: TDateTime;
|
||||||
|
|
||||||
|
Elapsed: Single;
|
||||||
|
Duration: Single;
|
||||||
|
RealTimeFactor: Single;
|
||||||
|
begin
|
||||||
|
Config.ModelConfig.Transducer.Encoder := './sherpa-onnx-zipformer-gigaspeech-2023-12-12/encoder-epoch-30-avg-1.int8.onnx';
|
||||||
|
Config.ModelConfig.Transducer.Decoder := './sherpa-onnx-zipformer-gigaspeech-2023-12-12/decoder-epoch-30-avg-1.onnx';
|
||||||
|
Config.ModelConfig.Transducer.Joiner := './sherpa-onnx-zipformer-gigaspeech-2023-12-12/joiner-epoch-30-avg-1.onnx';
|
||||||
|
Config.ModelConfig.Tokens := './sherpa-onnx-zipformer-gigaspeech-2023-12-12/tokens.txt';
|
||||||
|
Config.ModelConfig.Provider := 'cpu';
|
||||||
|
Config.ModelConfig.NumThreads := 1;
|
||||||
|
Config.ModelConfig.Debug := False;
|
||||||
|
|
||||||
|
WaveFilename := './sherpa-onnx-zipformer-gigaspeech-2023-12-12/test_wavs/1089-134686-0001.wav';
|
||||||
|
|
||||||
|
Wave := SherpaOnnxReadWave(WaveFilename);
|
||||||
|
|
||||||
|
Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
|
||||||
|
Stream := Recognizer.CreateStream();
|
||||||
|
Start := Now;
|
||||||
|
|
||||||
|
Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
|
||||||
|
Recognizer.Decode(Stream);
|
||||||
|
|
||||||
|
RecognitionResult := Recognizer.GetResult(Stream);
|
||||||
|
|
||||||
|
Stop := Now;
|
||||||
|
|
||||||
|
Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
|
||||||
|
Duration := Length(Wave.Samples) / Wave.SampleRate;
|
||||||
|
RealTimeFactor := Elapsed / Duration;
|
||||||
|
|
||||||
|
WriteLn(RecognitionResult.ToString);
|
||||||
|
WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
|
||||||
|
WriteLn(Format('Elapsed %.3f s', [Elapsed]));
|
||||||
|
WriteLn(Format('Wave duration %.3f s', [Duration]));
|
||||||
|
WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
|
||||||
|
|
||||||
|
{Free resources to avoid memory leak.
|
||||||
|
|
||||||
|
Note: You don't need to invoke them for this simple script.
|
||||||
|
However, you have to invoke them in your own large/complex project.
|
||||||
|
}
|
||||||
|
FreeAndNil(Stream);
|
||||||
|
FreeAndNil(Recognizer);
|
||||||
|
end.
|
||||||
2
pascal-api-examples/streaming-asr/.gitignore
vendored
2
pascal-api-examples/streaming-asr/.gitignore
vendored
@@ -1,4 +1,6 @@
|
|||||||
|
!run-*.sh
|
||||||
zipformer_transducer
|
zipformer_transducer
|
||||||
paraformer
|
paraformer
|
||||||
zipformer_ctc
|
zipformer_ctc
|
||||||
zipformer_ctc_hlg
|
zipformer_ctc_hlg
|
||||||
|
nemo_transducer
|
||||||
|
|||||||
@@ -9,3 +9,4 @@ APIs with streaming models for speech recognition.
|
|||||||
|[run-zipformer-ctc-hlg.sh](./run-zipformer-ctc-hlg.sh)|Use a streaming Zipformer CTC model for speech recognition|
|
|[run-zipformer-ctc-hlg.sh](./run-zipformer-ctc-hlg.sh)|Use a streaming Zipformer CTC model for speech recognition|
|
||||||
|[run-zipformer-ctc.sh](./run-zipformer-ctc.sh)|Use a streaming Zipformer CTC model with HLG for speech recognition|
|
|[run-zipformer-ctc.sh](./run-zipformer-ctc.sh)|Use a streaming Zipformer CTC model with HLG for speech recognition|
|
||||||
|[run-zipformer-transducer.sh](./run-zipformer-transducer.sh)|Use a Zipformer transducer model for speech recognition|
|
|[run-zipformer-transducer.sh](./run-zipformer-transducer.sh)|Use a Zipformer transducer model for speech recognition|
|
||||||
|
|[run-nemo-transducer.sh](./run-nemo-transducer.sh)|Use a NeMo transducer model for speech recognition|
|
||||||
|
|||||||
89
pascal-api-examples/streaming-asr/nemo_transducer.pas
Normal file
89
pascal-api-examples/streaming-asr/nemo_transducer.pas
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
{ Copyright (c) 2024 Xiaomi Corporation }
|
||||||
|
|
||||||
|
{
|
||||||
|
This file shows how to use a streaming NeMo transducer
|
||||||
|
to decode files.
|
||||||
|
|
||||||
|
You can download the model files from
|
||||||
|
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
}
|
||||||
|
|
||||||
|
program nemo_transducer;
|
||||||
|
|
||||||
|
{$mode objfpc}
|
||||||
|
|
||||||
|
uses
|
||||||
|
sherpa_onnx,
|
||||||
|
DateUtils,
|
||||||
|
SysUtils;
|
||||||
|
|
||||||
|
var
|
||||||
|
Config: TSherpaOnnxOnlineRecognizerConfig;
|
||||||
|
Recognizer: TSherpaOnnxOnlineRecognizer;
|
||||||
|
Stream: TSherpaOnnxOnlineStream;
|
||||||
|
RecognitionResult: TSherpaOnnxOnlineRecognizerResult;
|
||||||
|
Wave: TSherpaOnnxWave;
|
||||||
|
WaveFilename: AnsiString;
|
||||||
|
TailPaddings: array of Single;
|
||||||
|
|
||||||
|
Start: TDateTime;
|
||||||
|
Stop: TDateTime;
|
||||||
|
|
||||||
|
Elapsed: Single;
|
||||||
|
Duration: Single;
|
||||||
|
RealTimeFactor: Single;
|
||||||
|
begin
|
||||||
|
Initialize(Config);
|
||||||
|
|
||||||
|
{Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
to download model files used in this file.}
|
||||||
|
Config.ModelConfig.Transducer.Encoder := './sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/encoder.onnx';
|
||||||
|
Config.ModelConfig.Transducer.Decoder := './sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/decoder.onnx';
|
||||||
|
Config.ModelConfig.Transducer.Joiner := './sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/joiner.onnx';
|
||||||
|
Config.ModelConfig.Tokens := './sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/tokens.txt';
|
||||||
|
Config.ModelConfig.Provider := 'cpu';
|
||||||
|
Config.ModelConfig.NumThreads := 1;
|
||||||
|
Config.ModelConfig.Debug := False;
|
||||||
|
|
||||||
|
WaveFilename := './sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/test_wavs/0.wav';
|
||||||
|
|
||||||
|
Wave := SherpaOnnxReadWave(WaveFilename);
|
||||||
|
|
||||||
|
Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config);
|
||||||
|
|
||||||
|
Start := Now;
|
||||||
|
|
||||||
|
Stream := Recognizer.CreateStream();
|
||||||
|
|
||||||
|
Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
|
||||||
|
|
||||||
|
SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding}
|
||||||
|
Stream.AcceptWaveform(TailPaddings, Wave.SampleRate);
|
||||||
|
|
||||||
|
Stream.InputFinished();
|
||||||
|
|
||||||
|
while Recognizer.IsReady(Stream) do
|
||||||
|
Recognizer.Decode(Stream);
|
||||||
|
|
||||||
|
RecognitionResult := Recognizer.GetResult(Stream);
|
||||||
|
|
||||||
|
Stop := Now;
|
||||||
|
|
||||||
|
Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
|
||||||
|
Duration := Length(Wave.Samples) / Wave.SampleRate;
|
||||||
|
RealTimeFactor := Elapsed / Duration;
|
||||||
|
|
||||||
|
WriteLn(RecognitionResult.ToString);
|
||||||
|
WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
|
||||||
|
WriteLn(Format('Elapsed %.3f s', [Elapsed]));
|
||||||
|
WriteLn(Format('Wave duration %.3f s', [Duration]));
|
||||||
|
WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
|
||||||
|
|
||||||
|
{Free resources to avoid memory leak.
|
||||||
|
|
||||||
|
Note: You don't need to invoke them for this simple script.
|
||||||
|
However, you have to invoke them in your own large/complex project.
|
||||||
|
}
|
||||||
|
FreeAndNil(Stream);
|
||||||
|
FreeAndNil(Recognizer);
|
||||||
|
end.
|
||||||
41
pascal-api-examples/streaming-asr/run-nemo-transducer.sh
Executable file
41
pascal-api-examples/streaming-asr/run-nemo-transducer.sh
Executable file
@@ -0,0 +1,41 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
|
||||||
|
|
||||||
|
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
|
||||||
|
|
||||||
|
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
|
||||||
|
mkdir -p ../../build
|
||||||
|
pushd ../../build
|
||||||
|
cmake \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=./install \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||||
|
-DBUILD_SHARED_LIBS=ON \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||||
|
..
|
||||||
|
|
||||||
|
cmake --build . --target install --config Release
|
||||||
|
ls -lh lib
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/tokens.txt ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms.tar.bz2
|
||||||
|
rm sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
fpc \
|
||||||
|
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
|
||||||
|
-Fl$SHERPA_ONNX_DIR/build/install/lib \
|
||||||
|
./nemo_transducer.pas
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
|
||||||
|
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
|
||||||
|
./nemo_transducer
|
||||||
@@ -110,6 +110,109 @@ type
|
|||||||
function GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult;
|
function GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
TSherpaOnnxOfflineTransducerModelConfig = record
|
||||||
|
Encoder: AnsiString;
|
||||||
|
Decoder: AnsiString;
|
||||||
|
Joiner: AnsiString;
|
||||||
|
function ToString: AnsiString;
|
||||||
|
end;
|
||||||
|
|
||||||
|
TSherpaOnnxOfflineParaformerModelConfig = record
|
||||||
|
Model: AnsiString;
|
||||||
|
function ToString: AnsiString;
|
||||||
|
end;
|
||||||
|
|
||||||
|
TSherpaOnnxOfflineNemoEncDecCtcModelConfig = record
|
||||||
|
Model: AnsiString;
|
||||||
|
function ToString: AnsiString;
|
||||||
|
end;
|
||||||
|
|
||||||
|
TSherpaOnnxOfflineWhisperModelConfig = record
|
||||||
|
Encoder: AnsiString;
|
||||||
|
Decoder: AnsiString;
|
||||||
|
Language: AnsiString;
|
||||||
|
Task: AnsiString;
|
||||||
|
TailPaddings: Integer;
|
||||||
|
function ToString: AnsiString;
|
||||||
|
end;
|
||||||
|
|
||||||
|
TSherpaOnnxOfflineTdnnModelConfig = record
|
||||||
|
Model: AnsiString;
|
||||||
|
function ToString: AnsiString;
|
||||||
|
end;
|
||||||
|
|
||||||
|
TSherpaOnnxOfflineLMConfig = record
|
||||||
|
Model: AnsiString;
|
||||||
|
Scale: Single;
|
||||||
|
function ToString: AnsiString;
|
||||||
|
end;
|
||||||
|
|
||||||
|
TSherpaOnnxOfflineSenseVoiceModelConfig = record
|
||||||
|
Model: AnsiString;
|
||||||
|
Language: AnsiString;
|
||||||
|
UseItn: Boolean;
|
||||||
|
function ToString: AnsiString;
|
||||||
|
end;
|
||||||
|
|
||||||
|
TSherpaOnnxOfflineModelConfig = record
|
||||||
|
Transducer: TSherpaOnnxOfflineTransducerModelConfig;
|
||||||
|
Paraformer: TSherpaOnnxOfflineParaformerModelConfig;
|
||||||
|
NeMoCtc: TSherpaOnnxOfflineNemoEncDecCtcModelConfig;
|
||||||
|
Whisper: TSherpaOnnxOfflineWhisperModelConfig;
|
||||||
|
Tdnn: TSherpaOnnxOfflineTdnnModelConfig;
|
||||||
|
Tokens: AnsiString;
|
||||||
|
NumThreads: Integer;
|
||||||
|
Debug: Boolean;
|
||||||
|
Provider: AnsiString;
|
||||||
|
ModelType: AnsiString;
|
||||||
|
ModelingUnit: AnsiString;
|
||||||
|
BpeVocab: AnsiString;
|
||||||
|
TeleSpeechCtc: AnsiString;
|
||||||
|
SenseVoice: TSherpaOnnxOfflineSenseVoiceModelConfig;
|
||||||
|
function ToString: AnsiString;
|
||||||
|
end;
|
||||||
|
|
||||||
|
TSherpaOnnxOfflineRecognizerConfig = record
|
||||||
|
FeatConfig: TSherpaOnnxFeatureConfig;
|
||||||
|
ModelConfig: TSherpaOnnxOfflineModelConfig;
|
||||||
|
LMConfig: TSherpaOnnxOfflineLMConfig;
|
||||||
|
DecodingMethod: AnsiString;
|
||||||
|
MaxActivePaths: Integer;
|
||||||
|
HotwordsFile: AnsiString;
|
||||||
|
HotwordsScore: Single;
|
||||||
|
RuleFsts: AnsiString;
|
||||||
|
RuleFars: AnsiString;
|
||||||
|
BlankPenalty: Single;
|
||||||
|
function ToString: AnsiString;
|
||||||
|
end;
|
||||||
|
|
||||||
|
TSherpaOnnxOfflineRecognizerResult = record
|
||||||
|
Text: AnsiString;
|
||||||
|
Tokens: array of AnsiString;
|
||||||
|
Timestamps: array of Single;
|
||||||
|
function ToString: AnsiString;
|
||||||
|
end;
|
||||||
|
|
||||||
|
TSherpaOnnxOfflineStream = class
|
||||||
|
private
|
||||||
|
Handle: Pointer;
|
||||||
|
public
|
||||||
|
constructor Create(P: Pointer);
|
||||||
|
destructor Destroy; override;
|
||||||
|
procedure AcceptWaveform(Samples: array of Single; SampleRate: Integer);
|
||||||
|
end;
|
||||||
|
|
||||||
|
TSherpaOnnxOfflineRecognizer = class
|
||||||
|
private
|
||||||
|
Handle: Pointer;
|
||||||
|
public
|
||||||
|
constructor Create(Config: TSherpaOnnxOfflineRecognizerConfig);
|
||||||
|
destructor Destroy; override;
|
||||||
|
function CreateStream: TSherpaOnnxOfflineStream;
|
||||||
|
procedure Decode(Stream: TSherpaOnnxOfflineStream);
|
||||||
|
function GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult;
|
||||||
|
end;
|
||||||
|
|
||||||
{ It supports reading a single channel wave with 16-bit encoded samples.
|
{ It supports reading a single channel wave with 16-bit encoded samples.
|
||||||
Samples are normalized to the range [-1, 1].
|
Samples are normalized to the range [-1, 1].
|
||||||
}
|
}
|
||||||
@@ -204,6 +307,68 @@ type
|
|||||||
|
|
||||||
PSherpaOnnxOnlineRecognizerConfig = ^SherpaOnnxOnlineRecognizerConfig;
|
PSherpaOnnxOnlineRecognizerConfig = ^SherpaOnnxOnlineRecognizerConfig;
|
||||||
|
|
||||||
|
SherpaOnnxOfflineTransducerModelConfig = record
|
||||||
|
Encoder: PAnsiChar;
|
||||||
|
Decoder: PAnsiChar;
|
||||||
|
Joiner: PAnsiChar;
|
||||||
|
end;
|
||||||
|
SherpaOnnxOfflineParaformerModelConfig = record
|
||||||
|
Model: PAnsiChar;
|
||||||
|
end;
|
||||||
|
SherpaOnnxOfflineNemoEncDecCtcModelConfig = record
|
||||||
|
Model: PAnsiChar;
|
||||||
|
end;
|
||||||
|
SherpaOnnxOfflineWhisperModelConfig = record
|
||||||
|
Encoder: PAnsiChar;
|
||||||
|
Decoder: PAnsiChar;
|
||||||
|
Language: PAnsiChar;
|
||||||
|
Task: PAnsiChar;
|
||||||
|
TailPaddings: cint32;
|
||||||
|
end;
|
||||||
|
SherpaOnnxOfflineTdnnModelConfig = record
|
||||||
|
Model: PAnsiChar;
|
||||||
|
end;
|
||||||
|
SherpaOnnxOfflineLMConfig = record
|
||||||
|
Model: PAnsiChar;
|
||||||
|
Scale: Single;
|
||||||
|
end;
|
||||||
|
SherpaOnnxOfflineSenseVoiceModelConfig = record
|
||||||
|
Model: PAnsiChar;
|
||||||
|
Language: PAnsiChar;
|
||||||
|
UseItn: cint32;
|
||||||
|
end;
|
||||||
|
SherpaOnnxOfflineModelConfig = record
|
||||||
|
Transducer: SherpaOnnxOfflineTransducerModelConfig;
|
||||||
|
Paraformer: SherpaOnnxOfflineParaformerModelConfig;
|
||||||
|
NeMoCtc: SherpaOnnxOfflineNemoEncDecCtcModelConfig;
|
||||||
|
Whisper: SherpaOnnxOfflineWhisperModelConfig;
|
||||||
|
Tdnn: SherpaOnnxOfflineTdnnModelConfig;
|
||||||
|
Tokens: PAnsiChar;
|
||||||
|
NumThreads: cint32;
|
||||||
|
Debug: cint32;
|
||||||
|
Provider: PAnsiChar;
|
||||||
|
ModelType: PAnsiChar;
|
||||||
|
ModelingUnit: PAnsiChar;
|
||||||
|
BpeVocab: PAnsiChar;
|
||||||
|
TeleSpeechCtc: PAnsiChar;
|
||||||
|
SenseVoice: SherpaOnnxOfflineSenseVoiceModelConfig;
|
||||||
|
end;
|
||||||
|
|
||||||
|
SherpaOnnxOfflineRecognizerConfig = record
|
||||||
|
FeatConfig: SherpaOnnxFeatureConfig;
|
||||||
|
ModelConfig: SherpaOnnxOfflineModelConfig;
|
||||||
|
LMConfig: SherpaOnnxOfflineLMConfig;
|
||||||
|
DecodingMethod: PAnsiChar;
|
||||||
|
MaxActivePaths: cint32;
|
||||||
|
HotwordsFile: PAnsiChar;
|
||||||
|
HotwordsScore: Single;
|
||||||
|
RuleFsts: PAnsiChar;
|
||||||
|
RuleFars: PAnsiChar;
|
||||||
|
BlankPenalty: Single;
|
||||||
|
end;
|
||||||
|
|
||||||
|
PSherpaOnnxOfflineRecognizerConfig = ^SherpaOnnxOfflineRecognizerConfig;
|
||||||
|
|
||||||
function SherpaOnnxCreateOnlineRecognizer(Config: PSherpaOnnxOnlineRecognizerConfig): Pointer; cdecl;
|
function SherpaOnnxCreateOnlineRecognizer(Config: PSherpaOnnxOnlineRecognizerConfig): Pointer; cdecl;
|
||||||
external SherpaOnnxLibName;
|
external SherpaOnnxLibName;
|
||||||
|
|
||||||
@@ -244,6 +409,31 @@ function SherpaOnnxGetOnlineStreamResultAsJson(Recognizer: Pointer; Stream: Poin
|
|||||||
procedure SherpaOnnxDestroyOnlineStreamResultJson(PJson: PAnsiChar); cdecl;
|
procedure SherpaOnnxDestroyOnlineStreamResultJson(PJson: PAnsiChar); cdecl;
|
||||||
external SherpaOnnxLibName;
|
external SherpaOnnxLibName;
|
||||||
|
|
||||||
|
function SherpaOnnxCreateOfflineRecognizer(Config: PSherpaOnnxOfflineRecognizerConfig): Pointer; cdecl;
|
||||||
|
external SherpaOnnxLibName;
|
||||||
|
|
||||||
|
procedure SherpaOnnxDestroyOfflineRecognizer(Recognizer: Pointer); cdecl;
|
||||||
|
external SherpaOnnxLibName;
|
||||||
|
|
||||||
|
function SherpaOnnxCreateOfflineStream(Recognizer: Pointer): Pointer; cdecl;
|
||||||
|
external SherpaOnnxLibName;
|
||||||
|
|
||||||
|
procedure SherpaOnnxDestroyOfflineStream(Stream: Pointer); cdecl;
|
||||||
|
external SherpaOnnxLibName;
|
||||||
|
|
||||||
|
procedure SherpaOnnxAcceptWaveformOffline(Stream: Pointer;
|
||||||
|
SampleRate: cint32; Samples: pcfloat; N: cint32); cdecl;
|
||||||
|
external SherpaOnnxLibName;
|
||||||
|
|
||||||
|
procedure SherpaOnnxDecodeOfflineStream(Recognizer: Pointer; Stream: Pointer); cdecl;
|
||||||
|
external SherpaOnnxLibName;
|
||||||
|
|
||||||
|
function SherpaOnnxGetOfflineStreamResultAsJson(Stream: Pointer): PAnsiChar; cdecl;
|
||||||
|
external SherpaOnnxLibName;
|
||||||
|
|
||||||
|
procedure SherpaOnnxDestroyOfflineStreamResultJson(Json: PAnsiChar); cdecl;
|
||||||
|
external SherpaOnnxLibName;
|
||||||
|
|
||||||
function SherpaOnnxReadWaveWrapper(Filename: PAnsiChar): PSherpaOnnxWave; cdecl;
|
function SherpaOnnxReadWaveWrapper(Filename: PAnsiChar): PSherpaOnnxWave; cdecl;
|
||||||
external SherpaOnnxLibName name 'SherpaOnnxReadWave';
|
external SherpaOnnxLibName name 'SherpaOnnxReadWave';
|
||||||
|
|
||||||
@@ -322,7 +512,7 @@ end;
|
|||||||
|
|
||||||
function TSherpaOnnxOnlineRecognizerConfig.ToString: AnsiString;
|
function TSherpaOnnxOnlineRecognizerConfig.ToString: AnsiString;
|
||||||
begin
|
begin
|
||||||
Result := Format('TSherpaOnnxOnlineRecognizerConfig(FeatConfg := %s, ' +
|
Result := Format('TSherpaOnnxOnlineRecognizerConfig(FeatConfig := %s, ' +
|
||||||
'ModelConfig := %s, ' +
|
'ModelConfig := %s, ' +
|
||||||
'DecodingMethod := %s, ' +
|
'DecodingMethod := %s, ' +
|
||||||
'MaxActivePaths := %d, ' +
|
'MaxActivePaths := %d, ' +
|
||||||
@@ -375,7 +565,7 @@ begin
|
|||||||
|
|
||||||
Result := Format('TSherpaOnnxOnlineRecognizerResult(Text := %s, ' +
|
Result := Format('TSherpaOnnxOnlineRecognizerResult(Text := %s, ' +
|
||||||
'Tokens := %s, ' +
|
'Tokens := %s, ' +
|
||||||
'Timestamps := %s, ' +
|
'Timestamps := %s' +
|
||||||
')',
|
')',
|
||||||
[Self.Text, TokensStr, TimestampStr]);
|
[Self.Text, TokensStr, TimestampStr]);
|
||||||
end;
|
end;
|
||||||
@@ -531,4 +721,268 @@ begin
|
|||||||
SherpaOnnxOnlineStreamInputFinished(Self.Handle);
|
SherpaOnnxOnlineStreamInputFinished(Self.Handle);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOfflineTransducerModelConfig.ToString: AnsiString;
|
||||||
|
begin
|
||||||
|
Result := Format('TSherpaOnnxOfflineTransducerModelConfig(' +
|
||||||
|
'Encoder := %s, ' +
|
||||||
|
'Decoder := %s, ' +
|
||||||
|
'Joiner := %s' +
|
||||||
|
')',
|
||||||
|
[Self.Encoder, Self.Decoder, Self.Joiner]);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOfflineParaformerModelConfig.ToString: AnsiString;
|
||||||
|
begin
|
||||||
|
Result := Format('TSherpaOnnxOfflineParaformerModelConfig(Model := %s)',
|
||||||
|
[Self.Model]);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOfflineNemoEncDecCtcModelConfig.ToString: AnsiString;
|
||||||
|
begin
|
||||||
|
Result := Format('TSherpaOnnxOfflineNemoEncDecCtcModelConfig(Model := %s)',
|
||||||
|
[Self.Model]);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOfflineWhisperModelConfig.ToString: AnsiString;
|
||||||
|
begin
|
||||||
|
Result := Format('TSherpaOnnxOfflineWhisperModelConfig(' +
|
||||||
|
'Encoder := %s, ' +
|
||||||
|
'Decoder := %s, ' +
|
||||||
|
'Language := %s, ' +
|
||||||
|
'Task := %s, ' +
|
||||||
|
'TailPaddings := %d' +
|
||||||
|
')',
|
||||||
|
[Self.Encoder, Self.Decoder, Self.Language, Self.Task, Self.TailPaddings]);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOfflineTdnnModelConfig.ToString: AnsiString;
|
||||||
|
begin
|
||||||
|
Result := Format('TSherpaOnnxOfflineTdnnModelConfig(Model := %s)',
|
||||||
|
[Self.Model]);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOfflineLMConfig.ToString: AnsiString;
|
||||||
|
begin
|
||||||
|
Result := Format('TSherpaOnnxOfflineLMConfig(' +
|
||||||
|
'Model := %s, ' +
|
||||||
|
'Scale := %.1f' +
|
||||||
|
')',
|
||||||
|
[Self.Model, Self.Scale]);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOfflineSenseVoiceModelConfig.ToString: AnsiString;
|
||||||
|
begin
|
||||||
|
Result := Format('TSherpaOnnxOfflineSenseVoiceModelConfig(' +
|
||||||
|
'Model := %s, ' +
|
||||||
|
'Language := %s, ' +
|
||||||
|
'UseItn := %s' +
|
||||||
|
')',
|
||||||
|
[Self.Model, Self.Language, Self.UseItn.ToString]);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOfflineModelConfig.ToString: AnsiString;
|
||||||
|
begin
|
||||||
|
Result := Format('TSherpaOnnxOfflineModelConfig(' +
|
||||||
|
'Transducer := %s, ' +
|
||||||
|
'Paraformer := %s, ' +
|
||||||
|
'NeMoCtc := %s, ' +
|
||||||
|
'Whisper := %s, ' +
|
||||||
|
'Tdnn := %s, ' +
|
||||||
|
'Tokens := %s, ' +
|
||||||
|
'NumThreads := %d, ' +
|
||||||
|
'Debug := %s, ' +
|
||||||
|
'Provider := %s, ' +
|
||||||
|
'ModelType := %s, ' +
|
||||||
|
'ModelingUnit := %s, ' +
|
||||||
|
'BpeVocab := %s, ' +
|
||||||
|
'TeleSpeechCtc := %s, ' +
|
||||||
|
'SenseVoice := %s' +
|
||||||
|
')',
|
||||||
|
[Self.Transducer.ToString, Self.Paraformer.ToString,
|
||||||
|
Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString,
|
||||||
|
Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider,
|
||||||
|
Self.ModelType, Self.ModelingUnit, Self.BpeVocab,
|
||||||
|
Self.TeleSpeechCtc, Self.SenseVoice.ToString
|
||||||
|
]);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOfflineRecognizerConfig.ToString: AnsiString;
|
||||||
|
begin
|
||||||
|
Result := Format('TSherpaOnnxOfflineRecognizerConfig(' +
|
||||||
|
'FeatConfig := %s, ' +
|
||||||
|
'ModelConfig := %s, ' +
|
||||||
|
'LMConfig := %s, ' +
|
||||||
|
'DecodingMethod := %s, ' +
|
||||||
|
'MaxActivePaths := %d, ' +
|
||||||
|
'HotwordsFile := %s, ' +
|
||||||
|
'HotwordsScore := %.1f, ' +
|
||||||
|
'RuleFsts := %s, ' +
|
||||||
|
'RuleFars := %s, ' +
|
||||||
|
'BlankPenalty := %1.f' +
|
||||||
|
')',
|
||||||
|
[Self.FeatConfig.ToString, Self.ModelConfig.ToString,
|
||||||
|
Self.LMConfig.ToString, Self.DecodingMethod, Self.MaxActivePaths,
|
||||||
|
Self.HotwordsFile, Self.HotwordsScore, Self.RuleFsts, Self.RuleFars,
|
||||||
|
Self.BlankPenalty
|
||||||
|
]);
|
||||||
|
end;
|
||||||
|
|
||||||
|
constructor TSherpaOnnxOfflineRecognizer.Create(Config: TSherpaOnnxOfflineRecognizerConfig);
|
||||||
|
var
|
||||||
|
C: SherpaOnnxOfflineRecognizerConfig;
|
||||||
|
begin
|
||||||
|
Initialize(C);
|
||||||
|
|
||||||
|
C.FeatConfig.SampleRate := Config.FeatConfig.SampleRate;
|
||||||
|
C.FeatConfig.FeatureDim := Config.FeatConfig.FeatureDim;
|
||||||
|
|
||||||
|
C.ModelConfig.Transducer.Encoder := PAnsiChar(Config.ModelConfig.Transducer.Encoder);
|
||||||
|
C.ModelConfig.Transducer.Decoder := PAnsiChar(Config.ModelConfig.Transducer.Decoder);
|
||||||
|
C.ModelConfig.Transducer.Joiner := PAnsiChar(Config.ModelConfig.Transducer.Joiner);
|
||||||
|
|
||||||
|
C.ModelConfig.Paraformer.Model := PAnsiChar(Config.ModelConfig.Paraformer.Model);
|
||||||
|
C.ModelConfig.NeMoCtc.Model := PAnsiChar(Config.ModelConfig.NeMoCtc.Model);
|
||||||
|
|
||||||
|
C.ModelConfig.Whisper.Encoder := PAnsiChar(Config.ModelConfig.Whisper.Encoder);
|
||||||
|
C.ModelConfig.Whisper.Decoder := PAnsiChar(Config.ModelConfig.Whisper.Decoder);
|
||||||
|
C.ModelConfig.Whisper.Language := PAnsiChar(Config.ModelConfig.Whisper.Language);
|
||||||
|
C.ModelConfig.Whisper.Task := PAnsiChar(Config.ModelConfig.Whisper.Task);
|
||||||
|
C.ModelConfig.Whisper.TailPaddings := Config.ModelConfig.Whisper.TailPaddings;
|
||||||
|
|
||||||
|
C.ModelConfig.Tdnn.Model := PAnsiChar(Config.ModelConfig.Tdnn.Model);
|
||||||
|
|
||||||
|
|
||||||
|
C.ModelConfig.Tokens := PAnsiChar(Config.ModelConfig.Tokens);
|
||||||
|
C.ModelConfig.NumThreads := Config.ModelConfig.NumThreads;
|
||||||
|
C.ModelConfig.Debug := Ord(Config.ModelConfig.Debug);
|
||||||
|
C.ModelConfig.Provider := PAnsiChar(Config.ModelConfig.Provider);
|
||||||
|
C.ModelConfig.ModelType := PAnsiChar(Config.ModelConfig.ModelType);
|
||||||
|
C.ModelConfig.ModelingUnit := PAnsiChar(Config.ModelConfig.ModelingUnit);
|
||||||
|
C.ModelConfig.BpeVocab := PAnsiChar(Config.ModelConfig.BpeVocab);
|
||||||
|
C.ModelConfig.TeleSpeechCtc := PAnsiChar(Config.ModelConfig.TeleSpeechCtc);
|
||||||
|
|
||||||
|
C.ModelConfig.SenseVoice.Model := PAnsiChar(Config.ModelConfig.SenseVoice.Model);
|
||||||
|
C.ModelConfig.SenseVoice.Language := PAnsiChar(Config.ModelConfig.SenseVoice.Language);
|
||||||
|
C.ModelConfig.SenseVoice.UseItn := Ord(Config.ModelConfig.SenseVoice.UseItn);
|
||||||
|
|
||||||
|
C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model);
|
||||||
|
C.LMConfig.Scale := Config.LMConfig.Scale;
|
||||||
|
|
||||||
|
C.DecodingMethod := PAnsiChar(Config.DecodingMethod);
|
||||||
|
C.MaxActivePaths := Config.MaxActivePaths;
|
||||||
|
C.HotwordsFile := PAnsiChar(Config.HotwordsFile);
|
||||||
|
C.HotwordsScore := Config.HotwordsScore;
|
||||||
|
C.RuleFsts := PAnsiChar(Config.RuleFsts);
|
||||||
|
C.RuleFars := PAnsiChar(Config.RuleFars);
|
||||||
|
C.BlankPenalty := Config.BlankPenalty;
|
||||||
|
|
||||||
|
Self.Handle := SherpaOnnxCreateOfflineRecognizer(@C);
|
||||||
|
end;
|
||||||
|
|
||||||
|
destructor TSherpaOnnxOfflineRecognizer.Destroy;
|
||||||
|
begin
|
||||||
|
SherpaOnnxDestroyOfflineRecognizer(Self.Handle);
|
||||||
|
Self.Handle := nil;
|
||||||
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOfflineRecognizer.CreateStream: TSherpaOnnxOfflineStream;
|
||||||
|
var
|
||||||
|
Stream: Pointer;
|
||||||
|
begin
|
||||||
|
Stream := SherpaOnnxCreateOfflineStream(Self.Handle);
|
||||||
|
Result := TSherpaOnnxOfflineStream.Create(Stream);
|
||||||
|
end;
|
||||||
|
|
||||||
|
procedure TSherpaOnnxOfflineRecognizer.Decode(Stream: TSherpaOnnxOfflineStream);
|
||||||
|
begin
|
||||||
|
SherpaOnnxDecodeOfflineStream(Self.Handle, Stream.Handle);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOfflineRecognizer.GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult;
|
||||||
|
var
|
||||||
|
pJson: PAnsiChar;
|
||||||
|
JsonData: TJSONData;
|
||||||
|
JsonObject : TJSONObject;
|
||||||
|
JsonEnum: TJSONEnum;
|
||||||
|
I: Integer;
|
||||||
|
begin
|
||||||
|
pJson := SherpaOnnxGetOfflineStreamResultAsJson(Stream.Handle);
|
||||||
|
|
||||||
|
JsonData := GetJSON(AnsiString(pJson), False);
|
||||||
|
|
||||||
|
JsonObject := JsonData as TJSONObject;
|
||||||
|
|
||||||
|
Result.Text := JsonObject.Strings['text'];
|
||||||
|
|
||||||
|
SetLength(Result.Tokens, JsonObject.Arrays['tokens'].Count);
|
||||||
|
|
||||||
|
I := 0;
|
||||||
|
for JsonEnum in JsonObject.Arrays['tokens'] do
|
||||||
|
begin
|
||||||
|
Result.Tokens[I] := JsonEnum.Value.AsString;
|
||||||
|
Inc(I);
|
||||||
|
end;
|
||||||
|
|
||||||
|
SetLength(Result.Timestamps, JsonObject.Arrays['timestamps'].Count);
|
||||||
|
I := 0;
|
||||||
|
for JsonEnum in JsonObject.Arrays['timestamps'] do
|
||||||
|
begin
|
||||||
|
Result.Timestamps[I] := JsonEnum.Value.AsFloat;
|
||||||
|
Inc(I);
|
||||||
|
end;
|
||||||
|
|
||||||
|
SherpaOnnxDestroyOfflineStreamResultJson(pJson);
|
||||||
|
end;
|
||||||
|
|
||||||
|
constructor TSherpaOnnxOfflineStream.Create(P: Pointer);
|
||||||
|
begin
|
||||||
|
Self.Handle := P;
|
||||||
|
end;
|
||||||
|
|
||||||
|
destructor TSherpaOnnxOfflineStream.Destroy;
|
||||||
|
begin
|
||||||
|
SherpaOnnxDestroyOfflineStream(Self.Handle);
|
||||||
|
Self.Handle := nil;
|
||||||
|
end;
|
||||||
|
|
||||||
|
procedure TSherpaOnnxOfflineStream.AcceptWaveform(Samples: array of Single; SampleRate: Integer);
|
||||||
|
begin
|
||||||
|
SherpaOnnxAcceptWaveformOffline(Self.Handle, SampleRate, pcfloat(Samples),
|
||||||
|
Length(Samples));
|
||||||
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOfflineRecognizerResult.ToString: AnsiString;
|
||||||
|
var
|
||||||
|
TokensStr: AnsiString;
|
||||||
|
S: AnsiString;
|
||||||
|
TimestampStr: AnsiString;
|
||||||
|
T: Single;
|
||||||
|
Sep: AnsiString;
|
||||||
|
begin
|
||||||
|
TokensStr := '[';
|
||||||
|
Sep := '';
|
||||||
|
for S in Self.Tokens do
|
||||||
|
begin
|
||||||
|
TokensStr := TokensStr + Sep + S;
|
||||||
|
Sep := ', ';
|
||||||
|
end;
|
||||||
|
TokensStr := TokensStr + ']';
|
||||||
|
|
||||||
|
TimestampStr := '[';
|
||||||
|
Sep := '';
|
||||||
|
for T in Self.Timestamps do
|
||||||
|
begin
|
||||||
|
TimestampStr := TimestampStr + Sep + Format('%.2f', [T]);
|
||||||
|
Sep := ', ';
|
||||||
|
end;
|
||||||
|
TimestampStr := TimestampStr + ']';
|
||||||
|
|
||||||
|
Result := Format('TSherpaOnnxOfflineRecognizerResult(Text := %s, ' +
|
||||||
|
'Tokens := %s, ' +
|
||||||
|
'Timestamps := %s' +
|
||||||
|
')',
|
||||||
|
[Self.Text, TokensStr, TimestampStr]);
|
||||||
|
end;
|
||||||
|
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user