Use a separate thread to initialize models for lazarus examples. (#1270)
So that the main thread is not blocked and the user interface is responsive.
This commit is contained in:
1
.github/workflows/lazarus.yaml
vendored
1
.github/workflows/lazarus.yaml
vendored
@@ -5,6 +5,7 @@ on:
|
|||||||
branches:
|
branches:
|
||||||
- master
|
- master
|
||||||
- lazarus
|
- lazarus
|
||||||
|
- fix-lazarus
|
||||||
paths:
|
paths:
|
||||||
- '.github/workflows/lazarus.yaml'
|
- '.github/workflows/lazarus.yaml'
|
||||||
- 'CMakeLists.txt'
|
- 'CMakeLists.txt'
|
||||||
|
|||||||
@@ -160,6 +160,10 @@
|
|||||||
<Filename Value="my_worker.pas"/>
|
<Filename Value="my_worker.pas"/>
|
||||||
<IsPartOfProject Value="True"/>
|
<IsPartOfProject Value="True"/>
|
||||||
</Unit>
|
</Unit>
|
||||||
|
<Unit>
|
||||||
|
<Filename Value="my_init.pas"/>
|
||||||
|
<IsPartOfProject Value="True"/>
|
||||||
|
</Unit>
|
||||||
</Units>
|
</Units>
|
||||||
</ProjectOptions>
|
</ProjectOptions>
|
||||||
<CompilerOptions>
|
<CompilerOptions>
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ uses
|
|||||||
athreads,
|
athreads,
|
||||||
{$ENDIF}
|
{$ENDIF}
|
||||||
Interfaces, // this includes the LCL widgetset
|
Interfaces, // this includes the LCL widgetset
|
||||||
Forms, unit1, my_worker
|
Forms, unit1, my_worker, my_init
|
||||||
{ you can add units after this };
|
{ you can add units after this };
|
||||||
|
|
||||||
{$R *.res}
|
{$R *.res}
|
||||||
|
|||||||
358
lazarus-examples/generate_subtitles/my_init.pas
Normal file
358
lazarus-examples/generate_subtitles/my_init.pas
Normal file
@@ -0,0 +1,358 @@
|
|||||||
|
unit my_init;
|
||||||
|
|
||||||
|
{$mode ObjFPC}{$H+}
|
||||||
|
|
||||||
|
interface
|
||||||
|
|
||||||
|
uses
|
||||||
|
{$IFDEF UNIX}
|
||||||
|
cthreads,
|
||||||
|
cmem,
|
||||||
|
{$ENDIF}
|
||||||
|
{$IFDEF HASAMIGA}
|
||||||
|
athreads,
|
||||||
|
{$ENDIF}
|
||||||
|
Classes, SysUtils;
|
||||||
|
|
||||||
|
type
|
||||||
|
TMyInitThread = class(TThread)
|
||||||
|
private
|
||||||
|
Status: AnsiString;
|
||||||
|
ModelDir: AnsiString;
|
||||||
|
procedure ShowStatus;
|
||||||
|
|
||||||
|
protected
|
||||||
|
procedure Execute; override;
|
||||||
|
public
|
||||||
|
Constructor Create(CreateSuspended: Boolean; ModelDirectory: AnsiString);
|
||||||
|
end;
|
||||||
|
|
||||||
|
var
|
||||||
|
MyInitThread: TMyInitThread;
|
||||||
|
|
||||||
|
implementation
|
||||||
|
|
||||||
|
uses
|
||||||
|
unit1, sherpa_onnx;
|
||||||
|
|
||||||
|
function CreateVad(VadFilename: AnsiString): TSherpaOnnxVoiceActivityDetector;
|
||||||
|
var
|
||||||
|
Config: TSherpaOnnxVadModelConfig;
|
||||||
|
|
||||||
|
SampleRate: Integer;
|
||||||
|
WindowSize: Integer;
|
||||||
|
begin
|
||||||
|
Initialize(Config);
|
||||||
|
|
||||||
|
SampleRate := 16000; {Please don't change it unless you know the details}
|
||||||
|
WindowSize := 512; {Please don't change it unless you know the details}
|
||||||
|
|
||||||
|
Config.SileroVad.Model := VadFilename;
|
||||||
|
Config.SileroVad.MinSpeechDuration := 0.5;
|
||||||
|
Config.SileroVad.MinSilenceDuration := 0.5;
|
||||||
|
Config.SileroVad.Threshold := 0.5;
|
||||||
|
Config.SileroVad.WindowSize := WindowSize;
|
||||||
|
Config.NumThreads:= 2;
|
||||||
|
Config.Debug:= True;
|
||||||
|
Config.Provider:= 'cpu';
|
||||||
|
Config.SampleRate := SampleRate;
|
||||||
|
|
||||||
|
Result := TSherpaOnnxVoiceActivityDetector.Create(Config, 30);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function CreateOfflineRecognizerTransducer(
|
||||||
|
Tokens: AnsiString;
|
||||||
|
Encoder: AnsiString;
|
||||||
|
Decoder: AnsiString;
|
||||||
|
Joiner: AnsiString;
|
||||||
|
ModelType: AnsiString): TSherpaOnnxOfflineRecognizer;
|
||||||
|
var
|
||||||
|
Config: TSherpaOnnxOfflineRecognizerConfig;
|
||||||
|
begin
|
||||||
|
Initialize(Config);
|
||||||
|
|
||||||
|
Config.ModelConfig.Transducer.Encoder := Encoder;
|
||||||
|
Config.ModelConfig.Transducer.Decoder := Decoder;
|
||||||
|
Config.ModelConfig.Transducer.Joiner := Joiner;
|
||||||
|
|
||||||
|
Config.ModelConfig.ModelType := ModelType;
|
||||||
|
Config.ModelConfig.Tokens := Tokens;
|
||||||
|
Config.ModelConfig.Provider := 'cpu';
|
||||||
|
Config.ModelConfig.NumThreads := 2;
|
||||||
|
Config.ModelConfig.Debug := False;
|
||||||
|
|
||||||
|
Result := TSherpaOnnxOfflineRecognizer.Create(Config);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function CreateOfflineRecognizerTeleSpeech(
|
||||||
|
Tokens: AnsiString;
|
||||||
|
TeleSpeech: AnsiString): TSherpaOnnxOfflineRecognizer;
|
||||||
|
var
|
||||||
|
Config: TSherpaOnnxOfflineRecognizerConfig;
|
||||||
|
begin
|
||||||
|
Initialize(Config);
|
||||||
|
|
||||||
|
Config.ModelConfig.TeleSpeechCtc := TeleSpeech;
|
||||||
|
|
||||||
|
Config.ModelConfig.Tokens := Tokens;
|
||||||
|
Config.ModelConfig.Provider := 'cpu';
|
||||||
|
Config.ModelConfig.NumThreads := 2;
|
||||||
|
Config.ModelConfig.Debug := False;
|
||||||
|
|
||||||
|
Result := TSherpaOnnxOfflineRecognizer.Create(Config);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function CreateOfflineRecognizerParaformer(
|
||||||
|
Tokens: AnsiString;
|
||||||
|
Paraformer: AnsiString): TSherpaOnnxOfflineRecognizer;
|
||||||
|
var
|
||||||
|
Config: TSherpaOnnxOfflineRecognizerConfig;
|
||||||
|
begin
|
||||||
|
Initialize(Config);
|
||||||
|
|
||||||
|
Config.ModelConfig.Paraformer.Model := Paraformer;
|
||||||
|
|
||||||
|
Config.ModelConfig.Tokens := Tokens;
|
||||||
|
Config.ModelConfig.Provider := 'cpu';
|
||||||
|
Config.ModelConfig.NumThreads := 2;
|
||||||
|
Config.ModelConfig.Debug := False;
|
||||||
|
|
||||||
|
Result := TSherpaOnnxOfflineRecognizer.Create(Config);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function CreateOfflineRecognizerSenseVoice(
|
||||||
|
Tokens: AnsiString;
|
||||||
|
SenseVoice: AnsiString): TSherpaOnnxOfflineRecognizer;
|
||||||
|
var
|
||||||
|
Config: TSherpaOnnxOfflineRecognizerConfig;
|
||||||
|
begin
|
||||||
|
Initialize(Config);
|
||||||
|
|
||||||
|
Config.ModelConfig.SenseVoice.Model := SenseVoice;
|
||||||
|
Config.ModelConfig.SenseVoice.Language := 'auto';
|
||||||
|
Config.ModelConfig.SenseVoice.UseItn := True;
|
||||||
|
Config.ModelConfig.Tokens := Tokens;
|
||||||
|
Config.ModelConfig.Provider := 'cpu';
|
||||||
|
Config.ModelConfig.NumThreads := 2;
|
||||||
|
Config.ModelConfig.Debug := False;
|
||||||
|
|
||||||
|
Result := TSherpaOnnxOfflineRecognizer.Create(Config);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function CreateOfflineRecognizerWhisper(
|
||||||
|
Tokens: AnsiString;
|
||||||
|
WhisperEncoder: AnsiString;
|
||||||
|
WhisperDecoder: AnsiString): TSherpaOnnxOfflineRecognizer;
|
||||||
|
var
|
||||||
|
Config: TSherpaOnnxOfflineRecognizerConfig;
|
||||||
|
begin
|
||||||
|
Initialize(Config);
|
||||||
|
|
||||||
|
Config.ModelConfig.Whisper.Encoder := WhisperEncoder;
|
||||||
|
Config.ModelConfig.Whisper.Decoder := WhisperDecoder;
|
||||||
|
Config.ModelConfig.Tokens := Tokens;
|
||||||
|
Config.ModelConfig.Provider := 'cpu';
|
||||||
|
Config.ModelConfig.NumThreads := 2;
|
||||||
|
Config.ModelConfig.Debug := False;
|
||||||
|
|
||||||
|
Result := TSherpaOnnxOfflineRecognizer.Create(Config);
|
||||||
|
end;
|
||||||
|
|
||||||
|
constructor TMyInitThread.Create(CreateSuspended : boolean; ModelDirectory: AnsiString);
|
||||||
|
begin
|
||||||
|
inherited Create(CreateSuspended);
|
||||||
|
ModelDir := ModelDirectory;
|
||||||
|
FreeOnTerminate := True;
|
||||||
|
end;
|
||||||
|
|
||||||
|
procedure TMyInitThread.ShowStatus;
|
||||||
|
begin
|
||||||
|
Form1.UpdateInitStatus(Status);
|
||||||
|
end;
|
||||||
|
|
||||||
|
procedure TMyInitThread.Execute;
|
||||||
|
var
|
||||||
|
Msg: AnsiString;
|
||||||
|
VadFilename: AnsiString;
|
||||||
|
Tokens: AnsiString;
|
||||||
|
|
||||||
|
WhisperEncoder: AnsiString;
|
||||||
|
WhisperDecoder: AnsiString;
|
||||||
|
|
||||||
|
SenseVoice: AnsiString;
|
||||||
|
|
||||||
|
Paraformer: AnsiString;
|
||||||
|
|
||||||
|
TeleSpeech: AnsiString;
|
||||||
|
|
||||||
|
TransducerEncoder: AnsiString; // from icefall
|
||||||
|
TransducerDecoder: AnsiString;
|
||||||
|
TransducerJoiner: AnsiString;
|
||||||
|
|
||||||
|
NeMoTransducerEncoder: AnsiString;
|
||||||
|
NeMoTransducerDecoder: AnsiString;
|
||||||
|
NeMoTransducerJoiner: AnsiString;
|
||||||
|
begin
|
||||||
|
VadFilename := ModelDir + 'silero_vad.onnx';
|
||||||
|
Tokens := ModelDir + 'tokens.txt';
|
||||||
|
|
||||||
|
{
|
||||||
|
Please refer to
|
||||||
|
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/export-onnx.html#available-models
|
||||||
|
for a list of whisper models.
|
||||||
|
|
||||||
|
In the code, we use the normalized filename whisper-encoder.onnx, whisper-decoder.onnx, and tokens.txt
|
||||||
|
You need to rename the existing model files.
|
||||||
|
|
||||||
|
For instance, if you use sherpa-onnx-whisper-tiny.en, you have to do
|
||||||
|
mv tiny.en-tokens.txt tokens.txt
|
||||||
|
|
||||||
|
mv tiny.en-encoder.onnx whisper-encoder.onnx
|
||||||
|
mv tiny.en-decoder.onnx whisper-decoder.onnx
|
||||||
|
|
||||||
|
// or use the int8.onnx
|
||||||
|
|
||||||
|
mv tiny.en-encoder.int8.onnx whisper-encoder.onnx
|
||||||
|
mv tiny.en-decoder.int8.onnx whisper-decoder.onnx
|
||||||
|
}
|
||||||
|
WhisperEncoder := ModelDir + 'whisper-encoder.onnx';
|
||||||
|
WhisperDecoder := ModelDir + 'whisper-decoder.onnx';
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
Please refer to
|
||||||
|
https://k2-fsa.github.io/sherpa/onnx/sense-voice/pretrained.html#pre-trained-models
|
||||||
|
to download models for SenseVoice.
|
||||||
|
|
||||||
|
In the code, we use the normalized model name sense-voice.onnx. You have
|
||||||
|
to rename the downloaded model files.
|
||||||
|
|
||||||
|
For example, you need to use
|
||||||
|
|
||||||
|
mv model.onnx sense-voice.onnx
|
||||||
|
|
||||||
|
// or use the int8.onnx
|
||||||
|
mv model.int8.onnx sense-voice.onnx
|
||||||
|
}
|
||||||
|
|
||||||
|
SenseVoice := ModelDir + 'sense-voice.onnx';
|
||||||
|
|
||||||
|
{
|
||||||
|
Please refer to
|
||||||
|
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html
|
||||||
|
to download paraformer models.
|
||||||
|
|
||||||
|
Note that you have to rename model.onnx or model.int8.onnx to paraformer.onnx.
|
||||||
|
An example is given below for the rename:
|
||||||
|
|
||||||
|
cp model.onnx paraformer.onnx
|
||||||
|
|
||||||
|
// or use int8.onnx
|
||||||
|
cp model.int8.onnx paraformer.onnx
|
||||||
|
}
|
||||||
|
Paraformer := ModelDir + 'paraformer.onnx';
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
please refer to
|
||||||
|
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/telespeech/models.html
|
||||||
|
to download TeleSpeech models.
|
||||||
|
|
||||||
|
Note that you have to rename model files after downloading. The following
|
||||||
|
is an example
|
||||||
|
|
||||||
|
mv model.onnx telespeech.onnx
|
||||||
|
|
||||||
|
// or to use int8.onnx
|
||||||
|
|
||||||
|
mv model.int8.onnx telespeech.onnx
|
||||||
|
}
|
||||||
|
|
||||||
|
TeleSpeech := ModelDir + 'telespeech.onnx';
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
Please refer to
|
||||||
|
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||||
|
to download an icefall offline transducer model. Note that you need to rename the
|
||||||
|
model files to transducer-encoder.onnx, transducer-decoder.onnx, and
|
||||||
|
transducer-joiner.onnx
|
||||||
|
}
|
||||||
|
TransducerEncoder := ModelDir + 'transducer-encoder.onnx';
|
||||||
|
TransducerDecoder := ModelDir + 'transducer-decoder.onnx';
|
||||||
|
TransducerJoiner := ModelDir + 'transducer-joiner.onnx';
|
||||||
|
|
||||||
|
{
|
||||||
|
Please visit
|
||||||
|
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
to donwload a NeMo transducer model.
|
||||||
|
}
|
||||||
|
NeMoTransducerEncoder := ModelDir + 'nemo-transducer-encoder.onnx';
|
||||||
|
NeMoTransducerDecoder := ModelDir + 'nemo-transducer-decoder.onnx';
|
||||||
|
NeMoTransducerJoiner := ModelDir + 'nemo-transducer-joiner.onnx';
|
||||||
|
|
||||||
|
if not FileExists(VadFilename) then
|
||||||
|
begin
|
||||||
|
Status := VadFilename + ' does not exist! Please download it from' +
|
||||||
|
sLineBreak + 'https://github.com/k2-fsa/sherpa-onnx/tree/asr-models';
|
||||||
|
Synchronize(@ShowStatus);
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
|
||||||
|
if Form1.Vad = nil then
|
||||||
|
begin
|
||||||
|
Form1.Vad := CreateVad(VadFilename);
|
||||||
|
end;
|
||||||
|
|
||||||
|
if not FileExists(Tokens) then
|
||||||
|
begin
|
||||||
|
Status := Tokens + ' not found. Please download a non-streaming ASR model first!';
|
||||||
|
Synchronize(@ShowStatus);
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
|
||||||
|
if FileExists(WhisperEncoder) and FileExists(WhisperDecoder) then
|
||||||
|
begin
|
||||||
|
Form1.OfflineRecognizer := CreateOfflineRecognizerWhisper(Tokens, WhisperEncoder, WhisperDecoder);
|
||||||
|
Msg := 'Whisper';
|
||||||
|
end
|
||||||
|
else if FileExists(SenseVoice) then
|
||||||
|
begin
|
||||||
|
Form1.OfflineRecognizer := CreateOfflineRecognizerSenseVoice(Tokens, SenseVoice);
|
||||||
|
Msg := 'SenseVoice';
|
||||||
|
end
|
||||||
|
else if FileExists(Paraformer) then
|
||||||
|
begin
|
||||||
|
Form1.OfflineRecognizer := CreateOfflineRecognizerParaformer(Tokens, Paraformer);
|
||||||
|
Msg := 'Paraformer';
|
||||||
|
end
|
||||||
|
else if FileExists(TeleSpeech) then
|
||||||
|
begin
|
||||||
|
Form1.OfflineRecognizer := CreateOfflineRecognizerTeleSpeech(Tokens, TeleSpeech);
|
||||||
|
Msg := 'TeleSpeech';
|
||||||
|
end
|
||||||
|
else if FileExists(TransducerEncoder) and FileExists(TransducerDecoder) and FileExists(TransducerJoiner) then
|
||||||
|
begin
|
||||||
|
Form1.OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens,
|
||||||
|
TransducerEncoder, TransducerDecoder, TransducerJoiner, 'transducer');
|
||||||
|
Msg := 'Zipformer transducer';
|
||||||
|
end
|
||||||
|
else if FileExists(NeMoTransducerEncoder) and FileExists(NeMoTransducerDecoder) and FileExists(NeMoTransducerJoiner) then
|
||||||
|
begin
|
||||||
|
Form1.OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens,
|
||||||
|
NeMoTransducerEncoder, NeMoTransducerDecoder, NeMoTransducerJoiner, 'nemo_transducer');
|
||||||
|
Msg := 'NeMo transducer';
|
||||||
|
end
|
||||||
|
else
|
||||||
|
begin
|
||||||
|
Status := 'Please download at least one non-streaming speech recognition model first.';
|
||||||
|
Synchronize(@ShowStatus);
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
|
||||||
|
Status := 'Congratulations! The ' + Msg + ' model is initialized succesfully!';
|
||||||
|
Synchronize(@ShowStatus);
|
||||||
|
end;
|
||||||
|
|
||||||
|
end.
|
||||||
|
|
||||||
@@ -41,6 +41,7 @@ type
|
|||||||
StopTime: Single;
|
StopTime: Single;
|
||||||
TotalDuration: Single);
|
TotalDuration: Single);
|
||||||
procedure UpdateProgress(StopTime: Single; TotalDuration: Single);
|
procedure UpdateProgress(StopTime: Single; TotalDuration: Single);
|
||||||
|
procedure UpdateInitStatus(Status: AnsiString);
|
||||||
public
|
public
|
||||||
Vad: TSherpaOnnxVoiceActivityDetector;
|
Vad: TSherpaOnnxVoiceActivityDetector;
|
||||||
OfflineRecognizer: TSherpaOnnxOfflineRecognizer;
|
OfflineRecognizer: TSherpaOnnxOfflineRecognizer;
|
||||||
@@ -52,7 +53,8 @@ var
|
|||||||
implementation
|
implementation
|
||||||
|
|
||||||
uses
|
uses
|
||||||
my_worker
|
my_worker,
|
||||||
|
my_init
|
||||||
{$IFDEF DARWIN}
|
{$IFDEF DARWIN}
|
||||||
,MacOSAll
|
,MacOSAll
|
||||||
,CocoaAll
|
,CocoaAll
|
||||||
@@ -76,128 +78,7 @@ begin
|
|||||||
end;
|
end;
|
||||||
{$ENDIF}
|
{$ENDIF}
|
||||||
|
|
||||||
function CreateVad(VadFilename: AnsiString): TSherpaOnnxVoiceActivityDetector;
|
|
||||||
var
|
|
||||||
Config: TSherpaOnnxVadModelConfig;
|
|
||||||
|
|
||||||
SampleRate: Integer;
|
|
||||||
WindowSize: Integer;
|
|
||||||
begin
|
|
||||||
Initialize(Config);
|
|
||||||
|
|
||||||
SampleRate := 16000; {Please don't change it unless you know the details}
|
|
||||||
WindowSize := 512; {Please don't change it unless you know the details}
|
|
||||||
|
|
||||||
Config.SileroVad.Model := VadFilename;
|
|
||||||
Config.SileroVad.MinSpeechDuration := 0.5;
|
|
||||||
Config.SileroVad.MinSilenceDuration := 0.5;
|
|
||||||
Config.SileroVad.Threshold := 0.5;
|
|
||||||
Config.SileroVad.WindowSize := WindowSize;
|
|
||||||
Config.NumThreads:= 2;
|
|
||||||
Config.Debug:= True;
|
|
||||||
Config.Provider:= 'cpu';
|
|
||||||
Config.SampleRate := SampleRate;
|
|
||||||
|
|
||||||
Result := TSherpaOnnxVoiceActivityDetector.Create(Config, 30);
|
|
||||||
end;
|
|
||||||
|
|
||||||
function CreateOfflineRecognizerTransducer(
|
|
||||||
Tokens: AnsiString;
|
|
||||||
Encoder: AnsiString;
|
|
||||||
Decoder: AnsiString;
|
|
||||||
Joiner: AnsiString;
|
|
||||||
ModelType: AnsiString): TSherpaOnnxOfflineRecognizer;
|
|
||||||
var
|
|
||||||
Config: TSherpaOnnxOfflineRecognizerConfig;
|
|
||||||
begin
|
|
||||||
Initialize(Config);
|
|
||||||
|
|
||||||
Config.ModelConfig.Transducer.Encoder := Encoder;
|
|
||||||
Config.ModelConfig.Transducer.Decoder := Decoder;
|
|
||||||
Config.ModelConfig.Transducer.Joiner := Joiner;
|
|
||||||
|
|
||||||
Config.ModelConfig.ModelType := ModelType;
|
|
||||||
Config.ModelConfig.Tokens := Tokens;
|
|
||||||
Config.ModelConfig.Provider := 'cpu';
|
|
||||||
Config.ModelConfig.NumThreads := 2;
|
|
||||||
Config.ModelConfig.Debug := False;
|
|
||||||
|
|
||||||
Result := TSherpaOnnxOfflineRecognizer.Create(Config);
|
|
||||||
end;
|
|
||||||
|
|
||||||
function CreateOfflineRecognizerTeleSpeech(
|
|
||||||
Tokens: AnsiString;
|
|
||||||
TeleSpeech: AnsiString): TSherpaOnnxOfflineRecognizer;
|
|
||||||
var
|
|
||||||
Config: TSherpaOnnxOfflineRecognizerConfig;
|
|
||||||
begin
|
|
||||||
Initialize(Config);
|
|
||||||
|
|
||||||
Config.ModelConfig.TeleSpeechCtc := TeleSpeech;
|
|
||||||
|
|
||||||
Config.ModelConfig.Tokens := Tokens;
|
|
||||||
Config.ModelConfig.Provider := 'cpu';
|
|
||||||
Config.ModelConfig.NumThreads := 2;
|
|
||||||
Config.ModelConfig.Debug := False;
|
|
||||||
|
|
||||||
Result := TSherpaOnnxOfflineRecognizer.Create(Config);
|
|
||||||
end;
|
|
||||||
|
|
||||||
function CreateOfflineRecognizerParaformer(
|
|
||||||
Tokens: AnsiString;
|
|
||||||
Paraformer: AnsiString): TSherpaOnnxOfflineRecognizer;
|
|
||||||
var
|
|
||||||
Config: TSherpaOnnxOfflineRecognizerConfig;
|
|
||||||
begin
|
|
||||||
Initialize(Config);
|
|
||||||
|
|
||||||
Config.ModelConfig.Paraformer.Model := Paraformer;
|
|
||||||
|
|
||||||
Config.ModelConfig.Tokens := Tokens;
|
|
||||||
Config.ModelConfig.Provider := 'cpu';
|
|
||||||
Config.ModelConfig.NumThreads := 2;
|
|
||||||
Config.ModelConfig.Debug := False;
|
|
||||||
|
|
||||||
Result := TSherpaOnnxOfflineRecognizer.Create(Config);
|
|
||||||
end;
|
|
||||||
|
|
||||||
function CreateOfflineRecognizerSenseVoice(
|
|
||||||
Tokens: AnsiString;
|
|
||||||
SenseVoice: AnsiString): TSherpaOnnxOfflineRecognizer;
|
|
||||||
var
|
|
||||||
Config: TSherpaOnnxOfflineRecognizerConfig;
|
|
||||||
begin
|
|
||||||
Initialize(Config);
|
|
||||||
|
|
||||||
Config.ModelConfig.SenseVoice.Model := SenseVoice;
|
|
||||||
Config.ModelConfig.SenseVoice.Language := 'auto';
|
|
||||||
Config.ModelConfig.SenseVoice.UseItn := True;
|
|
||||||
Config.ModelConfig.Tokens := Tokens;
|
|
||||||
Config.ModelConfig.Provider := 'cpu';
|
|
||||||
Config.ModelConfig.NumThreads := 2;
|
|
||||||
Config.ModelConfig.Debug := False;
|
|
||||||
|
|
||||||
Result := TSherpaOnnxOfflineRecognizer.Create(Config);
|
|
||||||
end;
|
|
||||||
|
|
||||||
function CreateOfflineRecognizerWhisper(
|
|
||||||
Tokens: AnsiString;
|
|
||||||
WhisperEncoder: AnsiString;
|
|
||||||
WhisperDecoder: AnsiString): TSherpaOnnxOfflineRecognizer;
|
|
||||||
var
|
|
||||||
Config: TSherpaOnnxOfflineRecognizerConfig;
|
|
||||||
begin
|
|
||||||
Initialize(Config);
|
|
||||||
|
|
||||||
Config.ModelConfig.Whisper.Encoder := WhisperEncoder;
|
|
||||||
Config.ModelConfig.Whisper.Decoder := WhisperDecoder;
|
|
||||||
Config.ModelConfig.Tokens := Tokens;
|
|
||||||
Config.ModelConfig.Provider := 'cpu';
|
|
||||||
Config.ModelConfig.NumThreads := 2;
|
|
||||||
Config.ModelConfig.Debug := False;
|
|
||||||
|
|
||||||
Result := TSherpaOnnxOfflineRecognizer.Create(Config);
|
|
||||||
end;
|
|
||||||
|
|
||||||
{$R *.lfm}
|
{$R *.lfm}
|
||||||
|
|
||||||
@@ -256,7 +137,7 @@ end;
|
|||||||
|
|
||||||
procedure TForm1.FormClose(Sender: TObject; var CloseAction: TCloseAction);
|
procedure TForm1.FormClose(Sender: TObject; var CloseAction: TCloseAction);
|
||||||
begin
|
begin
|
||||||
if (MyWorkerThread <> nil) and not MyWorkerThread.Finished then
|
if (MyWorkerThread <> nil) and (not MyWorkerThread.Finished) then
|
||||||
begin
|
begin
|
||||||
MyWorkerThread.Terminate;
|
MyWorkerThread.Terminate;
|
||||||
MyWorkerThread.WaitFor;
|
MyWorkerThread.WaitFor;
|
||||||
@@ -310,29 +191,35 @@ begin
|
|||||||
Form1.ResultMemo.Lines.Add(NewResult);
|
Form1.ResultMemo.Lines.Add(NewResult);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
procedure TForm1.UpdateInitStatus(Status: AnsiString);
|
||||||
|
begin
|
||||||
|
if EndsStr('model is initialized succesfully!', Status) then
|
||||||
|
begin
|
||||||
|
Form1.ResultMemo.Lines.Add(Status);
|
||||||
|
Form1.ResultMemo.Lines.Add('Please select a 16000Hz wave file to generate subtiles');
|
||||||
|
Form1.ResultMemo.Lines.Add('You can download some test wave files from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models');
|
||||||
|
Form1.ResultMemo.Lines.Add('For instance:');
|
||||||
|
Form1.ResultMemo.Lines.Add(' Chinese test wave: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav');
|
||||||
|
Form1.ResultMemo.Lines.Add(' English test wave: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav');
|
||||||
|
FileNameEdt.Enabled := True;
|
||||||
|
SelectFileBtn.Enabled := True;
|
||||||
|
|
||||||
|
end
|
||||||
|
else
|
||||||
|
begin
|
||||||
|
ShowMessage(Status);
|
||||||
|
Form1.ResultMemo.Lines.Clear();
|
||||||
|
Form1.ResultMemo.Lines.Add('Please refer to');
|
||||||
|
Form1.ResultMemo.Lines.Add('https://k2-fsa.github.io/sherpa/onnx/lazarus/generate-subtitles.html#download-models');
|
||||||
|
Form1.ResultMemo.Lines.Add('for how to download models');
|
||||||
|
|
||||||
|
InitBtn.Enabled := True;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
procedure TForm1.InitBtnClick(Sender: TObject);
|
procedure TForm1.InitBtnClick(Sender: TObject);
|
||||||
var
|
var
|
||||||
Msg: AnsiString;
|
|
||||||
ModelDir: AnsiString;
|
ModelDir: AnsiString;
|
||||||
VadFilename: AnsiString;
|
|
||||||
Tokens: AnsiString;
|
|
||||||
|
|
||||||
WhisperEncoder: AnsiString;
|
|
||||||
WhisperDecoder: AnsiString;
|
|
||||||
|
|
||||||
SenseVoice: AnsiString;
|
|
||||||
|
|
||||||
Paraformer: AnsiString;
|
|
||||||
|
|
||||||
TeleSpeech: AnsiString;
|
|
||||||
|
|
||||||
TransducerEncoder: AnsiString; // from icefall
|
|
||||||
TransducerDecoder: AnsiString;
|
|
||||||
TransducerJoiner: AnsiString;
|
|
||||||
|
|
||||||
NeMoTransducerEncoder: AnsiString;
|
|
||||||
NeMoTransducerDecoder: AnsiString;
|
|
||||||
NeMoTransducerJoiner: AnsiString;
|
|
||||||
begin
|
begin
|
||||||
{$IFDEF DARWIN}
|
{$IFDEF DARWIN}
|
||||||
ModelDir := GetResourcesPath;
|
ModelDir := GetResourcesPath;
|
||||||
@@ -340,161 +227,9 @@ begin
|
|||||||
ModelDir := './';
|
ModelDir := './';
|
||||||
{$ENDIF}
|
{$ENDIF}
|
||||||
|
|
||||||
VadFilename := ModelDir + 'silero_vad.onnx';
|
Form1.ResultMemo.Lines.Clear();
|
||||||
Tokens := ModelDir + 'tokens.txt';
|
ResultMemo.Lines.Add('Initializing the model. Please wait...');
|
||||||
|
MyInitThread := TMyInitThread.Create(False, ModelDir);
|
||||||
{
|
|
||||||
Please refer to
|
|
||||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/export-onnx.html#available-models
|
|
||||||
for a list of whisper models.
|
|
||||||
|
|
||||||
In the code, we use the normalized filename whisper-encoder.onnx, whisper-decoder.onnx, and tokens.txt
|
|
||||||
You need to rename the existing model files.
|
|
||||||
|
|
||||||
For instance, if you use sherpa-onnx-whisper-tiny.en, you have to do
|
|
||||||
mv tiny.en-tokens.txt tokens.txt
|
|
||||||
|
|
||||||
mv tiny.en-encoder.onnx whisper-encoder.onnx
|
|
||||||
mv tiny.en-decoder.onnx whisper-decoder.onnx
|
|
||||||
|
|
||||||
// or use the int8.onnx
|
|
||||||
|
|
||||||
mv tiny.en-encoder.int8.onnx whisper-encoder.onnx
|
|
||||||
mv tiny.en-decoder.int8.onnx whisper-decoder.onnx
|
|
||||||
}
|
|
||||||
WhisperEncoder := ModelDir + 'whisper-encoder.onnx';
|
|
||||||
WhisperDecoder := ModelDir + 'whisper-decoder.onnx';
|
|
||||||
|
|
||||||
|
|
||||||
{
|
|
||||||
Please refer to
|
|
||||||
https://k2-fsa.github.io/sherpa/onnx/sense-voice/pretrained.html#pre-trained-models
|
|
||||||
to download models for SenseVoice.
|
|
||||||
|
|
||||||
In the code, we use the normalized model name sense-voice.onnx. You have
|
|
||||||
to rename the downloaded model files.
|
|
||||||
|
|
||||||
For example, you need to use
|
|
||||||
|
|
||||||
mv model.onnx sense-voice.onnx
|
|
||||||
|
|
||||||
// or use the int8.onnx
|
|
||||||
mv model.int8.onnx sense-voice.onnx
|
|
||||||
}
|
|
||||||
|
|
||||||
SenseVoice := ModelDir + 'sense-voice.onnx';
|
|
||||||
|
|
||||||
{
|
|
||||||
Please refer to
|
|
||||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html
|
|
||||||
to download paraformer models.
|
|
||||||
|
|
||||||
Note that you have to rename model.onnx or model.int8.onnx to paraformer.onnx.
|
|
||||||
An example is given below for the rename:
|
|
||||||
|
|
||||||
cp model.onnx paraformer.onnx
|
|
||||||
|
|
||||||
// or use int8.onnx
|
|
||||||
cp model.int8.onnx paraformer.onnx
|
|
||||||
}
|
|
||||||
Paraformer := ModelDir + 'paraformer.onnx';
|
|
||||||
|
|
||||||
|
|
||||||
{
|
|
||||||
please refer to
|
|
||||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/telespeech/models.html
|
|
||||||
to download TeleSpeech models.
|
|
||||||
|
|
||||||
Note that you have to rename model files after downloading. The following
|
|
||||||
is an example
|
|
||||||
|
|
||||||
mv model.onnx telespeech.onnx
|
|
||||||
|
|
||||||
// or to use int8.onnx
|
|
||||||
|
|
||||||
mv model.int8.onnx telespeech.onnx
|
|
||||||
}
|
|
||||||
|
|
||||||
TeleSpeech := ModelDir + 'telespeech.onnx';
|
|
||||||
|
|
||||||
|
|
||||||
{
|
|
||||||
Please refer to
|
|
||||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
|
||||||
to download an icefall offline transducer model. Note that you need to rename the
|
|
||||||
model files to transducer-encoder.onnx, transducer-decoder.onnx, and
|
|
||||||
transducer-joiner.onnx
|
|
||||||
}
|
|
||||||
TransducerEncoder := ModelDir + 'transducer-encoder.onnx';
|
|
||||||
TransducerDecoder := ModelDir + 'transducer-decoder.onnx';
|
|
||||||
TransducerJoiner := ModelDir + 'transducer-joiner.onnx';
|
|
||||||
|
|
||||||
{
|
|
||||||
Please visit
|
|
||||||
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
|
||||||
to donwload a NeMo transducer model.
|
|
||||||
}
|
|
||||||
NeMoTransducerEncoder := ModelDir + 'nemo-transducer-encoder.onnx';
|
|
||||||
NeMoTransducerDecoder := ModelDir + 'nemo-transducer-decoder.onnx';
|
|
||||||
NeMoTransducerJoiner := ModelDir + 'nemo-transducer-joiner.onnx';
|
|
||||||
|
|
||||||
if not FileExists(VadFilename) then
|
|
||||||
begin
|
|
||||||
ShowMessage(VadFilename + ' does not exist! Please download it from' +
|
|
||||||
sLineBreak + 'https://github.com/k2-fsa/sherpa-onnx/tree/asr-models'
|
|
||||||
);
|
|
||||||
Exit;
|
|
||||||
end;
|
|
||||||
|
|
||||||
Self.Vad := CreateVad(VadFilename);
|
|
||||||
|
|
||||||
if not FileExists(Tokens) then
|
|
||||||
begin
|
|
||||||
ShowMessage(Tokens + ' not found. Please download a non-streaming ASR model first!');
|
|
||||||
Exit;
|
|
||||||
end;
|
|
||||||
|
|
||||||
if FileExists(WhisperEncoder) and FileExists(WhisperDecoder) then
|
|
||||||
begin
|
|
||||||
OfflineRecognizer := CreateOfflineRecognizerWhisper(Tokens, WhisperEncoder, WhisperDecoder);
|
|
||||||
Msg := 'Whisper';
|
|
||||||
end
|
|
||||||
else if FileExists(SenseVoice) then
|
|
||||||
begin
|
|
||||||
OfflineRecognizer := CreateOfflineRecognizerSenseVoice(Tokens, SenseVoice);
|
|
||||||
Msg := 'SenseVoice';
|
|
||||||
end
|
|
||||||
else if FileExists(Paraformer) then
|
|
||||||
begin
|
|
||||||
OfflineRecognizer := CreateOfflineRecognizerParaformer(Tokens, Paraformer);
|
|
||||||
Msg := 'Paraformer';
|
|
||||||
end
|
|
||||||
else if FileExists(TeleSpeech) then
|
|
||||||
begin
|
|
||||||
OfflineRecognizer := CreateOfflineRecognizerTeleSpeech(Tokens, TeleSpeech);
|
|
||||||
Msg := 'TeleSpeech';
|
|
||||||
end
|
|
||||||
else if FileExists(TransducerEncoder) and FileExists(TransducerDecoder) and FileExists(TransducerJoiner) then
|
|
||||||
begin
|
|
||||||
OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens,
|
|
||||||
TransducerEncoder, TransducerDecoder, TransducerJoiner, 'transducer');
|
|
||||||
Msg := 'Zipformer transducer';
|
|
||||||
end
|
|
||||||
else if FileExists(NeMoTransducerEncoder) and FileExists(NeMoTransducerDecoder) and FileExists(NeMoTransducerJoiner) then
|
|
||||||
begin
|
|
||||||
OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens,
|
|
||||||
NeMoTransducerEncoder, NeMoTransducerDecoder, NeMoTransducerJoiner, 'nemo_transducer');
|
|
||||||
Msg := 'NeMo transducer';
|
|
||||||
end
|
|
||||||
else
|
|
||||||
begin
|
|
||||||
ShowMessage('Please download at least one non-streaming speech recognition model first.');
|
|
||||||
Exit;
|
|
||||||
end;
|
|
||||||
|
|
||||||
MessageDlg('Congrat! The ' + Msg + ' model is initialized succesfully!', mtInformation, [mbOk], 0);
|
|
||||||
FileNameEdt.Enabled := True;
|
|
||||||
SelectFileBtn.Enabled := True;
|
|
||||||
InitBtn.Enabled := False;
|
InitBtn.Enabled := False;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
|||||||
@@ -69,7 +69,7 @@ bool SileroVadModelConfig::Validate() const {
|
|||||||
std::string SileroVadModelConfig::ToString() const {
|
std::string SileroVadModelConfig::ToString() const {
|
||||||
std::ostringstream os;
|
std::ostringstream os;
|
||||||
|
|
||||||
os << "SilerVadModelConfig(";
|
os << "SileroVadModelConfig(";
|
||||||
os << "model=\"" << model << "\", ";
|
os << "model=\"" << model << "\", ";
|
||||||
os << "threshold=" << threshold << ", ";
|
os << "threshold=" << threshold << ", ";
|
||||||
os << "min_silence_duration=" << min_silence_duration << ", ";
|
os << "min_silence_duration=" << min_silence_duration << ", ";
|
||||||
|
|||||||
@@ -98,6 +98,7 @@ type
|
|||||||
destructor Destroy; override;
|
destructor Destroy; override;
|
||||||
procedure AcceptWaveform(Samples: array of Single; SampleRate: Integer);
|
procedure AcceptWaveform(Samples: array of Single; SampleRate: Integer);
|
||||||
procedure InputFinished;
|
procedure InputFinished;
|
||||||
|
property GetHandle: Pointer Read Handle;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
TSherpaOnnxOnlineRecognizer = class
|
TSherpaOnnxOnlineRecognizer = class
|
||||||
@@ -116,6 +117,7 @@ type
|
|||||||
function IsEndpoint(Stream: TSherpaOnnxOnlineStream): Boolean;
|
function IsEndpoint(Stream: TSherpaOnnxOnlineStream): Boolean;
|
||||||
function GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult;
|
function GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult;
|
||||||
property Config: TSherpaOnnxOnlineRecognizerConfig Read _Config;
|
property Config: TSherpaOnnxOnlineRecognizerConfig Read _Config;
|
||||||
|
property GetHandle: Pointer Read Handle;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
TSherpaOnnxOfflineTransducerModelConfig = record
|
TSherpaOnnxOfflineTransducerModelConfig = record
|
||||||
@@ -213,6 +215,7 @@ type
|
|||||||
constructor Create(P: Pointer);
|
constructor Create(P: Pointer);
|
||||||
destructor Destroy; override;
|
destructor Destroy; override;
|
||||||
procedure AcceptWaveform(Samples: array of Single; SampleRate: Integer);
|
procedure AcceptWaveform(Samples: array of Single; SampleRate: Integer);
|
||||||
|
property GetHandle: Pointer Read Handle;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
TSherpaOnnxOfflineRecognizer = class
|
TSherpaOnnxOfflineRecognizer = class
|
||||||
@@ -226,6 +229,7 @@ type
|
|||||||
procedure Decode(Stream: TSherpaOnnxOfflineStream);
|
procedure Decode(Stream: TSherpaOnnxOfflineStream);
|
||||||
function GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult;
|
function GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult;
|
||||||
property Config: TSherpaOnnxOfflineRecognizerConfig Read _Config;
|
property Config: TSherpaOnnxOfflineRecognizerConfig Read _Config;
|
||||||
|
property GetHandle: Pointer Read Handle;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
TSherpaOnnxSileroVadModelConfig = record
|
TSherpaOnnxSileroVadModelConfig = record
|
||||||
@@ -262,6 +266,7 @@ type
|
|||||||
procedure Reset;
|
procedure Reset;
|
||||||
function Size: Integer;
|
function Size: Integer;
|
||||||
function Head: Integer;
|
function Head: Integer;
|
||||||
|
property GetHandle: Pointer Read Handle;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
TSherpaOnnxSpeechSegment = record
|
TSherpaOnnxSpeechSegment = record
|
||||||
@@ -286,6 +291,7 @@ type
|
|||||||
procedure Reset;
|
procedure Reset;
|
||||||
procedure Flush;
|
procedure Flush;
|
||||||
property Config: TSherpaOnnxVadModelConfig Read _Config;
|
property Config: TSherpaOnnxVadModelConfig Read _Config;
|
||||||
|
property GetHandle: Pointer Read Handle;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
{ It supports reading a single channel wave with 16-bit encoded samples.
|
{ It supports reading a single channel wave with 16-bit encoded samples.
|
||||||
|
|||||||
Reference in New Issue
Block a user