Add Pascal API for Dolphin CTC models (#2096)
This commit is contained in:
11
.github/workflows/pascal.yaml
vendored
11
.github/workflows/pascal.yaml
vendored
@@ -149,6 +149,11 @@ jobs:
|
|||||||
cd ./pascal-api-examples
|
cd ./pascal-api-examples
|
||||||
|
|
||||||
pushd non-streaming-asr
|
pushd non-streaming-asr
|
||||||
|
|
||||||
|
./run-dolphin-ctc.sh
|
||||||
|
rm -rf sherpa-onnx-*
|
||||||
|
echo "---"
|
||||||
|
|
||||||
./run-zipformer-transducer.sh
|
./run-zipformer-transducer.sh
|
||||||
rm -rf sherpa-onnx-*
|
rm -rf sherpa-onnx-*
|
||||||
echo "---"
|
echo "---"
|
||||||
@@ -253,7 +258,13 @@ jobs:
|
|||||||
|
|
||||||
cd ./pascal-api-examples
|
cd ./pascal-api-examples
|
||||||
|
|
||||||
|
|
||||||
pushd vad-with-non-streaming-asr
|
pushd vad-with-non-streaming-asr
|
||||||
|
|
||||||
|
time ./run-vad-with-dolphin-ctc.sh
|
||||||
|
rm -rf sherpa-onnx-*
|
||||||
|
echo "---"
|
||||||
|
|
||||||
time ./run-vad-with-moonshine.sh
|
time ./run-vad-with-moonshine.sh
|
||||||
rm -rf sherpa-onnx-*
|
rm -rf sherpa-onnx-*
|
||||||
echo "---"
|
echo "---"
|
||||||
|
|||||||
@@ -60,7 +60,7 @@ This repository supports running the following functions **locally**
|
|||||||
|
|
||||||
on the following platforms and operating systems:
|
on the following platforms and operating systems:
|
||||||
|
|
||||||
- x86, ``x86_64``, 32-bit ARM, 64-bit ARM (arm64, aarch64), RISC-V (riscv64)
|
- x86, ``x86_64``, 32-bit ARM, 64-bit ARM (arm64, aarch64), RISC-V (riscv64), **RK NPU**
|
||||||
- Linux, macOS, Windows, openKylin
|
- Linux, macOS, Windows, openKylin
|
||||||
- Android, WearOS
|
- Android, WearOS
|
||||||
- iOS
|
- iOS
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ APIs with non-streaming models for speech recognition.
|
|||||||
|
|
||||||
|File|Description|
|
|File|Description|
|
||||||
|----|-----------|
|
|----|-----------|
|
||||||
|
|[run-dolphin-ctc.sh](./run-dolphin-ctc.sh)|Use a non-streaming [Dolphin](https://github.com/DataoceanAI/Dolphin) CTC model for speech recognition|
|
||||||
|[run-nemo-ctc.sh](./run-nemo-ctc.sh)|Use a non-streaming NeMo CTC model for speech recognition|
|
|[run-nemo-ctc.sh](./run-nemo-ctc.sh)|Use a non-streaming NeMo CTC model for speech recognition|
|
||||||
|[run-nemo-transducer.sh](./run-nemo-transducer.sh)|Use a non-streaming NeMo transducer model for speech recognition|
|
|[run-nemo-transducer.sh](./run-nemo-transducer.sh)|Use a non-streaming NeMo transducer model for speech recognition|
|
||||||
|[run-paraformer-itn.sh](./run-paraformer-itn.sh)|Use a non-streaming Paraformer model for speech recognition with inverse text normalization for numbers|
|
|[run-paraformer-itn.sh](./run-paraformer-itn.sh)|Use a non-streaming Paraformer model for speech recognition with inverse text normalization for numbers|
|
||||||
|
|||||||
76
pascal-api-examples/non-streaming-asr/dolphin_ctc.pas
Normal file
76
pascal-api-examples/non-streaming-asr/dolphin_ctc.pas
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
{ Copyright (c) 2025 Xiaomi Corporation }
|
||||||
|
|
||||||
|
{
|
||||||
|
This file shows how to use a non-streaming Dolphin CTC model
|
||||||
|
to decode files.
|
||||||
|
|
||||||
|
You can download the model files from
|
||||||
|
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
}
|
||||||
|
|
||||||
|
program dolphin_ctc;
|
||||||
|
|
||||||
|
{$mode objfpc}
|
||||||
|
|
||||||
|
uses
|
||||||
|
sherpa_onnx,
|
||||||
|
DateUtils,
|
||||||
|
SysUtils;
|
||||||
|
|
||||||
|
var
|
||||||
|
Wave: TSherpaOnnxWave;
|
||||||
|
WaveFilename: AnsiString;
|
||||||
|
|
||||||
|
Config: TSherpaOnnxOfflineRecognizerConfig;
|
||||||
|
Recognizer: TSherpaOnnxOfflineRecognizer;
|
||||||
|
Stream: TSherpaOnnxOfflineStream;
|
||||||
|
RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
|
||||||
|
|
||||||
|
Start: TDateTime;
|
||||||
|
Stop: TDateTime;
|
||||||
|
|
||||||
|
Elapsed: Single;
|
||||||
|
Duration: Single;
|
||||||
|
RealTimeFactor: Single;
|
||||||
|
begin
|
||||||
|
Initialize(Config);
|
||||||
|
|
||||||
|
Config.ModelConfig.Dolphin.Model := './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx';
|
||||||
|
Config.ModelConfig.Tokens := './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt';
|
||||||
|
Config.ModelConfig.Provider := 'cpu';
|
||||||
|
Config.ModelConfig.NumThreads := 1;
|
||||||
|
Config.ModelConfig.Debug := False;
|
||||||
|
|
||||||
|
WaveFilename := './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav';
|
||||||
|
|
||||||
|
Wave := SherpaOnnxReadWave(WaveFilename);
|
||||||
|
|
||||||
|
Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
|
||||||
|
Stream := Recognizer.CreateStream();
|
||||||
|
Start := Now;
|
||||||
|
|
||||||
|
Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
|
||||||
|
Recognizer.Decode(Stream);
|
||||||
|
|
||||||
|
RecognitionResult := Recognizer.GetResult(Stream);
|
||||||
|
|
||||||
|
Stop := Now;
|
||||||
|
|
||||||
|
Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
|
||||||
|
Duration := Length(Wave.Samples) / Wave.SampleRate;
|
||||||
|
RealTimeFactor := Elapsed / Duration;
|
||||||
|
|
||||||
|
WriteLn(RecognitionResult.ToString);
|
||||||
|
WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
|
||||||
|
WriteLn(Format('Elapsed %.3f s', [Elapsed]));
|
||||||
|
WriteLn(Format('Wave duration %.3f s', [Duration]));
|
||||||
|
WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
|
||||||
|
|
||||||
|
{Free resources to avoid memory leak.
|
||||||
|
|
||||||
|
Note: You don't need to invoke them for this simple script.
|
||||||
|
However, you have to invoke them in your own large/complex project.
|
||||||
|
}
|
||||||
|
FreeAndNil(Stream);
|
||||||
|
FreeAndNil(Recognizer);
|
||||||
|
end.
|
||||||
42
pascal-api-examples/non-streaming-asr/run-dolphin-ctc.sh
Executable file
42
pascal-api-examples/non-streaming-asr/run-dolphin-ctc.sh
Executable file
@@ -0,0 +1,42 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
|
||||||
|
|
||||||
|
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
|
||||||
|
|
||||||
|
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
|
||||||
|
mkdir -p ../../build
|
||||||
|
pushd ../../build
|
||||||
|
cmake \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=./install \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||||
|
-DBUILD_SHARED_LIBS=ON \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||||
|
..
|
||||||
|
|
||||||
|
cmake --build . --target install --config Release
|
||||||
|
ls -lh lib
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
|
||||||
|
rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
fpc \
|
||||||
|
-dSHERPA_ONNX_USE_SHARED_LIBS \
|
||||||
|
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
|
||||||
|
-Fl$SHERPA_ONNX_DIR/build/install/lib \
|
||||||
|
./dolphin_ctc.pas
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
|
||||||
|
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
|
||||||
|
./dolphin_ctc
|
||||||
@@ -6,7 +6,10 @@ with non-streaming speech recognition models.
|
|||||||
|
|
||||||
|Directory| Description|
|
|Directory| Description|
|
||||||
|---------|------------|
|
|---------|------------|
|
||||||
|[run-vad-with-whisper.sh](./run-vad-with-whisper.sh)|It shows how to use the VAD + Whisper for speech recognition.|
|
|[run-vad-with-dolphin-ctc.sh](./run-vad-with-dolphin-ctc.sh)|It shows how to use the VAD + [Dolphin](https://github.com/DataoceanAI/Dolphin) for speech recognition.|
|
||||||
|[run-vad-with-sense-voice.sh](./run-vad-with-sense-voice.sh)|It shows how to use the VAD + SenseVoice for speech recognition.|
|
|[run-vad-with-whisper.sh](./run-vad-with-whisper.sh)|It shows how to use the VAD + [Whisper](https://github.com/openai/whisper) for speech recognition.|
|
||||||
|
|[run-vad-with-sense-voice.sh](./run-vad-with-sense-voice.sh)|It shows how to use the VAD + [SenseVoice](https://github.com/FunAudioLLM/SenseVoice) for speech recognition.|
|
||||||
|
|[run-vad-with-moonshine.sh](./run-vad-with-moonshine.sh)|It shows how to use the VAD + [Moonshine](https://github.com/usefulsensors/moonshine) for speech recognition.|
|
||||||
|
|
||||||
|
|
||||||
Please refer to [non-streaming-asr](../non-streaming-asr) for more kinds of non-streaming models.
|
Please refer to [non-streaming-asr](../non-streaming-asr) for more kinds of non-streaming models.
|
||||||
|
|||||||
49
pascal-api-examples/vad-with-non-streaming-asr/run-vad-with-dolphin-ctc.sh
Executable file
49
pascal-api-examples/vad-with-non-streaming-asr/run-vad-with-dolphin-ctc.sh
Executable file
@@ -0,0 +1,49 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
|
||||||
|
|
||||||
|
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
|
||||||
|
|
||||||
|
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
|
||||||
|
mkdir -p ../../build
|
||||||
|
pushd ../../build
|
||||||
|
cmake \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=./install \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||||
|
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||||
|
-DBUILD_SHARED_LIBS=ON \
|
||||||
|
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||||
|
..
|
||||||
|
|
||||||
|
cmake --build . --target install --config Release
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ ! -f ./silero_vad.onnx ]]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./lei-jun-test.wav ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
|
||||||
|
rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
fpc \
|
||||||
|
-dSHERPA_ONNX_USE_SHARED_LIBS \
|
||||||
|
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
|
||||||
|
-Fl$SHERPA_ONNX_DIR/build/install/lib \
|
||||||
|
./vad_with_dolphin.pas
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
|
||||||
|
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
|
||||||
|
./vad_with_dolphin
|
||||||
@@ -0,0 +1,135 @@
|
|||||||
|
{ Copyright (c) 2025 Xiaomi Corporation }
|
||||||
|
|
||||||
|
{
|
||||||
|
This file shows how to use a non-streaming Dolphin model
|
||||||
|
with silero VAD to decode files.
|
||||||
|
|
||||||
|
You can download the model files from
|
||||||
|
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
}
|
||||||
|
|
||||||
|
program vad_with_dolphin;
|
||||||
|
|
||||||
|
{$mode objfpc}
|
||||||
|
|
||||||
|
uses
|
||||||
|
sherpa_onnx,
|
||||||
|
SysUtils;
|
||||||
|
|
||||||
|
function CreateVad(): TSherpaOnnxVoiceActivityDetector;
|
||||||
|
var
|
||||||
|
Config: TSherpaOnnxVadModelConfig;
|
||||||
|
|
||||||
|
SampleRate: Integer;
|
||||||
|
WindowSize: Integer;
|
||||||
|
begin
|
||||||
|
Initialize(Config);
|
||||||
|
|
||||||
|
SampleRate := 16000; {Please don't change it unless you know the details}
|
||||||
|
WindowSize := 512; {Please don't change it unless you know the details}
|
||||||
|
|
||||||
|
Config.SileroVad.Model := './silero_vad.onnx';
|
||||||
|
Config.SileroVad.MinSpeechDuration := 0.5;
|
||||||
|
Config.SileroVad.MinSilenceDuration := 0.5;
|
||||||
|
Config.SileroVad.Threshold := 0.5;
|
||||||
|
Config.SileroVad.WindowSize := WindowSize;
|
||||||
|
Config.NumThreads:= 1;
|
||||||
|
Config.Debug:= True;
|
||||||
|
Config.Provider:= 'cpu';
|
||||||
|
Config.SampleRate := SampleRate;
|
||||||
|
|
||||||
|
Result := TSherpaOnnxVoiceActivityDetector.Create(Config, 30);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function CreateOfflineRecognizer(): TSherpaOnnxOfflineRecognizer;
|
||||||
|
var
|
||||||
|
Config: TSherpaOnnxOfflineRecognizerConfig;
|
||||||
|
begin
|
||||||
|
Initialize(Config);
|
||||||
|
|
||||||
|
Config.ModelConfig.Dolphin.Model := './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx';
|
||||||
|
Config.ModelConfig.Tokens := './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt';
|
||||||
|
Config.ModelConfig.Provider := 'cpu';
|
||||||
|
Config.ModelConfig.NumThreads := 1;
|
||||||
|
Config.ModelConfig.Debug := False;
|
||||||
|
|
||||||
|
Result := TSherpaOnnxOfflineRecognizer.Create(Config);
|
||||||
|
end;
|
||||||
|
|
||||||
|
var
|
||||||
|
Wave: TSherpaOnnxWave;
|
||||||
|
|
||||||
|
Recognizer: TSherpaOnnxOfflineRecognizer;
|
||||||
|
Vad: TSherpaOnnxVoiceActivityDetector;
|
||||||
|
|
||||||
|
Offset: Integer;
|
||||||
|
WindowSize: Integer;
|
||||||
|
SpeechSegment: TSherpaOnnxSpeechSegment;
|
||||||
|
|
||||||
|
Start: Single;
|
||||||
|
Duration: Single;
|
||||||
|
|
||||||
|
Stream: TSherpaOnnxOfflineStream;
|
||||||
|
RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
|
||||||
|
begin
|
||||||
|
Vad := CreateVad();
|
||||||
|
Recognizer := CreateOfflineRecognizer();
|
||||||
|
|
||||||
|
Wave := SherpaOnnxReadWave('./lei-jun-test.wav');
|
||||||
|
if Wave.SampleRate <> Vad.Config.SampleRate then
|
||||||
|
begin
|
||||||
|
WriteLn(Format('Expected sample rate: %d. Given: %d',
|
||||||
|
[Vad.Config.SampleRate, Wave.SampleRate]));
|
||||||
|
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
|
||||||
|
WindowSize := Vad.Config.SileroVad.WindowSize;
|
||||||
|
Offset := 0;
|
||||||
|
while Offset + WindowSize <= Length(Wave.Samples) do
|
||||||
|
begin
|
||||||
|
Vad.AcceptWaveform(Wave.Samples, Offset, WindowSize);
|
||||||
|
Offset += WindowSize;
|
||||||
|
|
||||||
|
while not Vad.IsEmpty do
|
||||||
|
begin
|
||||||
|
SpeechSegment := Vad.Front();
|
||||||
|
Vad.Pop();
|
||||||
|
Stream := Recognizer.CreateStream();
|
||||||
|
|
||||||
|
Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate);
|
||||||
|
Recognizer.Decode(Stream);
|
||||||
|
RecognitionResult := Recognizer.GetResult(Stream);
|
||||||
|
|
||||||
|
Start := SpeechSegment.Start / Wave.SampleRate;
|
||||||
|
Duration := Length(SpeechSegment.Samples) / Wave.SampleRate;
|
||||||
|
WriteLn(Format('%.3f -- %.3f %s',
|
||||||
|
[Start, Start + Duration, RecognitionResult.Text]));
|
||||||
|
|
||||||
|
FreeAndNil(Stream);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
Vad.Flush;
|
||||||
|
|
||||||
|
while not Vad.IsEmpty do
|
||||||
|
begin
|
||||||
|
SpeechSegment := Vad.Front();
|
||||||
|
Vad.Pop();
|
||||||
|
Stream := Recognizer.CreateStream();
|
||||||
|
|
||||||
|
Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate);
|
||||||
|
Recognizer.Decode(Stream);
|
||||||
|
RecognitionResult := Recognizer.GetResult(Stream);
|
||||||
|
|
||||||
|
Start := SpeechSegment.Start / Wave.SampleRate;
|
||||||
|
Duration := Length(SpeechSegment.Samples) / Wave.SampleRate;
|
||||||
|
WriteLn(Format('%.3f -- %.3f %s',
|
||||||
|
[Start, Start + Duration, RecognitionResult.Text]));
|
||||||
|
|
||||||
|
FreeAndNil(Stream);
|
||||||
|
end;
|
||||||
|
|
||||||
|
FreeAndNil(Recognizer);
|
||||||
|
FreeAndNil(Vad);
|
||||||
|
end.
|
||||||
@@ -8,7 +8,7 @@ You can download the model files from
|
|||||||
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
}
|
}
|
||||||
|
|
||||||
program vad_with_whisper;
|
program vad_with_sense_voice;
|
||||||
|
|
||||||
{$mode objfpc}
|
{$mode objfpc}
|
||||||
|
|
||||||
|
|||||||
@@ -1969,7 +1969,7 @@ int32_t SherpaOnnxLinearResamplerResampleGetOutputSampleRate(
|
|||||||
return p->impl->GetOutputSamplingRate();
|
return p->impl->GetOutputSamplingRate();
|
||||||
}
|
}
|
||||||
|
|
||||||
void SherpaOnnxLinearResamplerReset(SherpaOnnxLinearResampler *p) {
|
void SherpaOnnxLinearResamplerReset(const SherpaOnnxLinearResampler *p) {
|
||||||
p->impl->Reset();
|
p->impl->Reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -270,6 +270,11 @@ type
|
|||||||
function ToString: AnsiString;
|
function ToString: AnsiString;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
TSherpaOnnxOfflineDolphinModelConfig = record
|
||||||
|
Model: AnsiString;
|
||||||
|
function ToString: AnsiString;
|
||||||
|
end;
|
||||||
|
|
||||||
TSherpaOnnxOfflineWhisperModelConfig = record
|
TSherpaOnnxOfflineWhisperModelConfig = record
|
||||||
Encoder: AnsiString;
|
Encoder: AnsiString;
|
||||||
Decoder: AnsiString;
|
Decoder: AnsiString;
|
||||||
@@ -331,6 +336,7 @@ type
|
|||||||
SenseVoice: TSherpaOnnxOfflineSenseVoiceModelConfig;
|
SenseVoice: TSherpaOnnxOfflineSenseVoiceModelConfig;
|
||||||
Moonshine: TSherpaOnnxOfflineMoonshineModelConfig;
|
Moonshine: TSherpaOnnxOfflineMoonshineModelConfig;
|
||||||
FireRedAsr: TSherpaOnnxOfflineFireRedAsrModelConfig;
|
FireRedAsr: TSherpaOnnxOfflineFireRedAsrModelConfig;
|
||||||
|
Dolphin: TSherpaOnnxOfflineDolphinModelConfig;
|
||||||
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig);
|
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig);
|
||||||
function ToString: AnsiString;
|
function ToString: AnsiString;
|
||||||
end;
|
end;
|
||||||
@@ -694,6 +700,9 @@ type
|
|||||||
SherpaOnnxOfflineNemoEncDecCtcModelConfig = record
|
SherpaOnnxOfflineNemoEncDecCtcModelConfig = record
|
||||||
Model: PAnsiChar;
|
Model: PAnsiChar;
|
||||||
end;
|
end;
|
||||||
|
SherpaOnnxOfflineDolphinModelConfig = record
|
||||||
|
Model: PAnsiChar;
|
||||||
|
end;
|
||||||
SherpaOnnxOfflineWhisperModelConfig = record
|
SherpaOnnxOfflineWhisperModelConfig = record
|
||||||
Encoder: PAnsiChar;
|
Encoder: PAnsiChar;
|
||||||
Decoder: PAnsiChar;
|
Decoder: PAnsiChar;
|
||||||
@@ -740,6 +749,7 @@ type
|
|||||||
SenseVoice: SherpaOnnxOfflineSenseVoiceModelConfig;
|
SenseVoice: SherpaOnnxOfflineSenseVoiceModelConfig;
|
||||||
Moonshine: SherpaOnnxOfflineMoonshineModelConfig;
|
Moonshine: SherpaOnnxOfflineMoonshineModelConfig;
|
||||||
FireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig;
|
FireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig;
|
||||||
|
Dolphin: SherpaOnnxOfflineDolphinModelConfig;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
SherpaOnnxOfflineRecognizerConfig = record
|
SherpaOnnxOfflineRecognizerConfig = record
|
||||||
@@ -1461,6 +1471,12 @@ begin
|
|||||||
[Self.Model]);
|
[Self.Model]);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
function TSherpaOnnxOfflineDolphinModelConfig.ToString: AnsiString;
|
||||||
|
begin
|
||||||
|
Result := Format('TSherpaOnnxOfflineDolphinModelConfig(Model := %s)',
|
||||||
|
[Self.Model]);
|
||||||
|
end;
|
||||||
|
|
||||||
function TSherpaOnnxOfflineWhisperModelConfig.ToString: AnsiString;
|
function TSherpaOnnxOfflineWhisperModelConfig.ToString: AnsiString;
|
||||||
begin
|
begin
|
||||||
Result := Format('TSherpaOnnxOfflineWhisperModelConfig(' +
|
Result := Format('TSherpaOnnxOfflineWhisperModelConfig(' +
|
||||||
@@ -1534,14 +1550,15 @@ begin
|
|||||||
'TeleSpeechCtc := %s, ' +
|
'TeleSpeechCtc := %s, ' +
|
||||||
'SenseVoice := %s, ' +
|
'SenseVoice := %s, ' +
|
||||||
'Moonshine := %s, ' +
|
'Moonshine := %s, ' +
|
||||||
'FireRedAsr := %s' +
|
'FireRedAsr := %s, ' +
|
||||||
|
'Dolphin := %s' +
|
||||||
')',
|
')',
|
||||||
[Self.Transducer.ToString, Self.Paraformer.ToString,
|
[Self.Transducer.ToString, Self.Paraformer.ToString,
|
||||||
Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString,
|
Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString,
|
||||||
Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider,
|
Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider,
|
||||||
Self.ModelType, Self.ModelingUnit, Self.BpeVocab,
|
Self.ModelType, Self.ModelingUnit, Self.BpeVocab,
|
||||||
Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString,
|
Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString,
|
||||||
Self.FireRedAsr.ToString
|
Self.FireRedAsr.ToString, Self.Dolphin.ToString
|
||||||
]);
|
]);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
@@ -1610,6 +1627,8 @@ begin
|
|||||||
C.ModelConfig.FireRedAsr.Encoder := PAnsiChar(Config.ModelConfig.FireRedAsr.Encoder);
|
C.ModelConfig.FireRedAsr.Encoder := PAnsiChar(Config.ModelConfig.FireRedAsr.Encoder);
|
||||||
C.ModelConfig.FireRedAsr.Decoder := PAnsiChar(Config.ModelConfig.FireRedAsr.Decoder);
|
C.ModelConfig.FireRedAsr.Decoder := PAnsiChar(Config.ModelConfig.FireRedAsr.Decoder);
|
||||||
|
|
||||||
|
C.ModelConfig.Dolphin.Model := PAnsiChar(Config.ModelConfig.Dolphin.Model);
|
||||||
|
|
||||||
C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model);
|
C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model);
|
||||||
C.LMConfig.Scale := Config.LMConfig.Scale;
|
C.LMConfig.Scale := Config.LMConfig.Scale;
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user