503 lines
13 KiB
ObjectPascal
503 lines
13 KiB
ObjectPascal
unit Unit1;
|
|
|
|
{$mode objfpc}{$H+}
|
|
|
|
{$IFDEF DARWIN}
|
|
{$modeswitch objectivec1} {For getting resource directory}
|
|
{$ENDIF}
|
|
|
|
interface
|
|
|
|
uses
|
|
Classes, SysUtils, StrUtils, Forms, Controls,
|
|
Graphics, Dialogs, StdCtrls,
|
|
sherpa_onnx, ComCtrls;
|
|
|
|
type
|
|
|
|
{ TForm1 }
|
|
|
|
TForm1 = class(TForm)
|
|
InitBtn: TButton;
|
|
ProgressBar: TProgressBar;
|
|
ResultMemo: TMemo;
|
|
StartBtn: TButton;
|
|
SelectFileDlg: TOpenDialog;
|
|
SelectFileBtn: TButton;
|
|
FileNameEdt: TEdit;
|
|
ProgressLabel: TLabel;
|
|
procedure FileNameEdtChange(Sender: TObject);
|
|
procedure FormClose(Sender: TObject; var CloseAction: TCloseAction);
|
|
procedure InitBtnClick(Sender: TObject);
|
|
procedure SelectFileBtnClick(Sender: TObject);
|
|
procedure FormCreate(Sender: TObject);
|
|
procedure StartBtnClick(Sender: TObject);
|
|
private
|
|
|
|
public
|
|
procedure UpdateResult(
|
|
Msg: AnsiString;
|
|
StartTime: Single;
|
|
StopTime: Single;
|
|
TotalDuration: Single);
|
|
procedure UpdateProgress(StopTime: Single; TotalDuration: Single);
|
|
public
|
|
Vad: TSherpaOnnxVoiceActivityDetector;
|
|
OfflineRecognizer: TSherpaOnnxOfflineRecognizer;
|
|
end;
|
|
|
|
var
|
|
Form1: TForm1;
|
|
|
|
implementation
|
|
|
|
uses
|
|
my_worker
|
|
{$IFDEF DARWIN}
|
|
,MacOSAll
|
|
,CocoaAll
|
|
{$ENDIF}
|
|
;
|
|
{See https://wiki.lazarus.freepascal.org/Locating_the_macOS_application_resources_directory}
|
|
|
|
{$IFDEF DARWIN}
|
|
{Note: The returned path contains a trailing /}
|
|
function GetResourcesPath(): AnsiString;
|
|
var
|
|
pathStr: shortstring;
|
|
status: Boolean = false;
|
|
begin
|
|
status := CFStringGetPascalString(CFStringRef(NSBundle.mainBundle.resourcePath), @pathStr, 255, CFStringGetSystemEncoding());
|
|
|
|
if status = true then
|
|
Result := pathStr + PathDelim
|
|
else
|
|
raise Exception.Create('Error in GetResourcesPath()');
|
|
end;
|
|
{$ENDIF}
|
|
|
|
function CreateVad(VadFilename: AnsiString): TSherpaOnnxVoiceActivityDetector;
|
|
var
|
|
Config: TSherpaOnnxVadModelConfig;
|
|
|
|
SampleRate: Integer;
|
|
WindowSize: Integer;
|
|
begin
|
|
Initialize(Config);
|
|
|
|
SampleRate := 16000; {Please don't change it unless you know the details}
|
|
WindowSize := 512; {Please don't change it unless you know the details}
|
|
|
|
Config.SileroVad.Model := VadFilename;
|
|
Config.SileroVad.MinSpeechDuration := 0.5;
|
|
Config.SileroVad.MinSilenceDuration := 0.5;
|
|
Config.SileroVad.Threshold := 0.5;
|
|
Config.SileroVad.WindowSize := WindowSize;
|
|
Config.NumThreads:= 2;
|
|
Config.Debug:= True;
|
|
Config.Provider:= 'cpu';
|
|
Config.SampleRate := SampleRate;
|
|
|
|
Result := TSherpaOnnxVoiceActivityDetector.Create(Config, 30);
|
|
end;
|
|
|
|
function CreateOfflineRecognizerTransducer(
|
|
Tokens: AnsiString;
|
|
Encoder: AnsiString;
|
|
Decoder: AnsiString;
|
|
Joiner: AnsiString;
|
|
ModelType: AnsiString): TSherpaOnnxOfflineRecognizer;
|
|
var
|
|
Config: TSherpaOnnxOfflineRecognizerConfig;
|
|
begin
|
|
Initialize(Config);
|
|
|
|
Config.ModelConfig.Transducer.Encoder := Encoder;
|
|
Config.ModelConfig.Transducer.Decoder := Decoder;
|
|
Config.ModelConfig.Transducer.Joiner := Joiner;
|
|
|
|
Config.ModelConfig.ModelType := ModelType;
|
|
Config.ModelConfig.Tokens := Tokens;
|
|
Config.ModelConfig.Provider := 'cpu';
|
|
Config.ModelConfig.NumThreads := 2;
|
|
Config.ModelConfig.Debug := False;
|
|
|
|
Result := TSherpaOnnxOfflineRecognizer.Create(Config);
|
|
end;
|
|
|
|
function CreateOfflineRecognizerTeleSpeech(
|
|
Tokens: AnsiString;
|
|
TeleSpeech: AnsiString): TSherpaOnnxOfflineRecognizer;
|
|
var
|
|
Config: TSherpaOnnxOfflineRecognizerConfig;
|
|
begin
|
|
Initialize(Config);
|
|
|
|
Config.ModelConfig.TeleSpeechCtc := TeleSpeech;
|
|
|
|
Config.ModelConfig.Tokens := Tokens;
|
|
Config.ModelConfig.Provider := 'cpu';
|
|
Config.ModelConfig.NumThreads := 2;
|
|
Config.ModelConfig.Debug := False;
|
|
|
|
Result := TSherpaOnnxOfflineRecognizer.Create(Config);
|
|
end;
|
|
|
|
function CreateOfflineRecognizerParaformer(
|
|
Tokens: AnsiString;
|
|
Paraformer: AnsiString): TSherpaOnnxOfflineRecognizer;
|
|
var
|
|
Config: TSherpaOnnxOfflineRecognizerConfig;
|
|
begin
|
|
Initialize(Config);
|
|
|
|
Config.ModelConfig.Paraformer.Model := Paraformer;
|
|
|
|
Config.ModelConfig.Tokens := Tokens;
|
|
Config.ModelConfig.Provider := 'cpu';
|
|
Config.ModelConfig.NumThreads := 2;
|
|
Config.ModelConfig.Debug := False;
|
|
|
|
Result := TSherpaOnnxOfflineRecognizer.Create(Config);
|
|
end;
|
|
|
|
function CreateOfflineRecognizerSenseVoice(
|
|
Tokens: AnsiString;
|
|
SenseVoice: AnsiString): TSherpaOnnxOfflineRecognizer;
|
|
var
|
|
Config: TSherpaOnnxOfflineRecognizerConfig;
|
|
begin
|
|
Initialize(Config);
|
|
|
|
Config.ModelConfig.SenseVoice.Model := SenseVoice;
|
|
Config.ModelConfig.SenseVoice.Language := 'auto';
|
|
Config.ModelConfig.SenseVoice.UseItn := True;
|
|
Config.ModelConfig.Tokens := Tokens;
|
|
Config.ModelConfig.Provider := 'cpu';
|
|
Config.ModelConfig.NumThreads := 2;
|
|
Config.ModelConfig.Debug := False;
|
|
|
|
Result := TSherpaOnnxOfflineRecognizer.Create(Config);
|
|
end;
|
|
|
|
function CreateOfflineRecognizerWhisper(
|
|
Tokens: AnsiString;
|
|
WhisperEncoder: AnsiString;
|
|
WhisperDecoder: AnsiString): TSherpaOnnxOfflineRecognizer;
|
|
var
|
|
Config: TSherpaOnnxOfflineRecognizerConfig;
|
|
begin
|
|
Initialize(Config);
|
|
|
|
Config.ModelConfig.Whisper.Encoder := WhisperEncoder;
|
|
Config.ModelConfig.Whisper.Decoder := WhisperDecoder;
|
|
Config.ModelConfig.Tokens := Tokens;
|
|
Config.ModelConfig.Provider := 'cpu';
|
|
Config.ModelConfig.NumThreads := 2;
|
|
Config.ModelConfig.Debug := False;
|
|
|
|
Result := TSherpaOnnxOfflineRecognizer.Create(Config);
|
|
end;
|
|
|
|
{$R *.lfm}
|
|
|
|
{ TForm1 }
|
|
|
|
procedure TForm1.FormCreate(Sender: TObject);
|
|
begin
|
|
StartBtn.Enabled := False;
|
|
SelectFileDlg.Filter := 'All Files|*.wav';
|
|
FileNameEdt.Enabled := False;
|
|
SelectFileBtn.Enabled := False;
|
|
ResultMemo.Lines.Add('1. It supports only 1 channel, 16-bit, 16000Hz wav files');
|
|
ResultMemo.Lines.Add('2. There should be no Chinese characters in the file path.');
|
|
|
|
ProgressBar.Position := 0;
|
|
ProgressLabel.Caption := '';
|
|
end;
|
|
|
|
procedure TForm1.StartBtnClick(Sender: TObject);
|
|
begin
|
|
if StartBtn.Caption = 'Stop' then
|
|
begin
|
|
if (MyWorkerThread <> nil) and not MyWorkerThread.Finished then
|
|
MyWorkerThread.Terminate;
|
|
|
|
StartBtn.Caption := 'Start';
|
|
Exit;
|
|
end;
|
|
|
|
ResultMemo.Lines.Clear();
|
|
ResultMemo.Lines.Add('Start processing');
|
|
|
|
ProgressBar.Position := 0;
|
|
ProgressLabel.Caption := Format('%d%%', [ProgressBar.Position]);
|
|
|
|
MyWorkerThread := TMyWorkerThread.Create(False, FileNameEdt.Text);
|
|
|
|
StartBtn.Caption := 'Stop';
|
|
end;
|
|
|
|
procedure TForm1.SelectFileBtnClick(Sender: TObject);
|
|
begin
|
|
if SelectFileDlg.Execute then
|
|
begin
|
|
FileNameEdt.Text := SelectFileDlg.FileName;
|
|
end;
|
|
end;
|
|
|
|
procedure TForm1.FileNameEdtChange(Sender: TObject);
|
|
begin
|
|
if FileExists(FileNameEdt.Text) then
|
|
StartBtn.Enabled := True
|
|
else
|
|
StartBtn.Enabled := False;
|
|
end;
|
|
|
|
procedure TForm1.FormClose(Sender: TObject; var CloseAction: TCloseAction);
|
|
begin
|
|
if (MyWorkerThread <> nil) and not MyWorkerThread.Finished then
|
|
begin
|
|
MyWorkerThread.Terminate;
|
|
MyWorkerThread.WaitFor;
|
|
end;
|
|
FreeAndNil(Vad);
|
|
FreeAndNil(OfflineRecognizer);
|
|
end;
|
|
|
|
procedure TForm1.UpdateProgress(StopTime: Single; TotalDuration: Single);
|
|
var
|
|
Percent: Single;
|
|
begin
|
|
if (StopTime <> 0) and (TotalDuration <> 0) then
|
|
begin
|
|
Percent := StopTime / TotalDuration * 100;
|
|
ProgressBar.Position := Round(Percent);
|
|
ProgressLabel.Caption := Format('%d%%', [ProgressBar.Position]);
|
|
end;
|
|
end;
|
|
|
|
procedure TForm1.UpdateResult(
|
|
Msg: AnsiString;
|
|
StartTime: Single;
|
|
StopTime: Single;
|
|
TotalDuration: Single);
|
|
var
|
|
NewResult: AnsiString;
|
|
begin
|
|
UpdateProgress(StopTime, TotalDuration);
|
|
|
|
if (Msg = 'DONE!') or
|
|
(Msg = 'Cancelled!') or
|
|
EndsStr('16-bit encoded wave files', Msg) or
|
|
EndsStr('. Please select a new file', Msg) then
|
|
begin
|
|
Form1.StartBtn.Caption := 'Start';
|
|
NewResult := Msg;
|
|
end
|
|
else
|
|
begin
|
|
NewResult := Format('%.3f -- %.3f %s', [StartTime, StopTime, Msg]);
|
|
end;
|
|
|
|
if Msg = 'DONE!' then
|
|
begin
|
|
ProgressBar.Position := 100;
|
|
|
|
ProgressLabel.Caption := '100%';
|
|
end;
|
|
|
|
Form1.ResultMemo.Lines.Add(NewResult);
|
|
end;
|
|
|
|
procedure TForm1.InitBtnClick(Sender: TObject);
|
|
var
|
|
Msg: AnsiString;
|
|
ModelDir: AnsiString;
|
|
VadFilename: AnsiString;
|
|
Tokens: AnsiString;
|
|
|
|
WhisperEncoder: AnsiString;
|
|
WhisperDecoder: AnsiString;
|
|
|
|
SenseVoice: AnsiString;
|
|
|
|
Paraformer: AnsiString;
|
|
|
|
TeleSpeech: AnsiString;
|
|
|
|
TransducerEncoder: AnsiString; // from icefall
|
|
TransducerDecoder: AnsiString;
|
|
TransducerJoiner: AnsiString;
|
|
|
|
NeMoTransducerEncoder: AnsiString;
|
|
NeMoTransducerDecoder: AnsiString;
|
|
NeMoTransducerJoiner: AnsiString;
|
|
begin
|
|
{$IFDEF DARWIN}
|
|
ModelDir := GetResourcesPath;
|
|
{$ELSE}
|
|
ModelDir := './';
|
|
{$ENDIF}
|
|
|
|
VadFilename := ModelDir + 'silero_vad.onnx';
|
|
Tokens := ModelDir + 'tokens.txt';
|
|
|
|
{
|
|
Please refer to
|
|
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/export-onnx.html#available-models
|
|
for a list of whisper models.
|
|
|
|
In the code, we use the normalized filename whisper-encoder.onnx, whisper-decoder.onnx, and tokens.txt
|
|
You need to rename the existing model files.
|
|
|
|
For instance, if you use sherpa-onnx-whisper-tiny.en, you have to do
|
|
mv tiny.en-tokens.txt tokens.txt
|
|
|
|
mv tiny.en-encoder.onnx whisper-encoder.onnx
|
|
mv tiny.en-decoder.onnx whisper-decoder.onnx
|
|
|
|
// or use the int8.onnx
|
|
|
|
mv tiny.en-encoder.int8.onnx whisper-encoder.onnx
|
|
mv tiny.en-decoder.int8.onnx whisper-decoder.onnx
|
|
}
|
|
WhisperEncoder := ModelDir + 'whisper-encoder.onnx';
|
|
WhisperDecoder := ModelDir + 'whisper-decoder.onnx';
|
|
|
|
|
|
{
|
|
Please refer to
|
|
https://k2-fsa.github.io/sherpa/onnx/sense-voice/pretrained.html#pre-trained-models
|
|
to download models for SenseVoice.
|
|
|
|
In the code, we use the normalized model name sense-voice.onnx. You have
|
|
to rename the downloaded model files.
|
|
|
|
For example, you need to use
|
|
|
|
mv model.onnx sense-voice.onnx
|
|
|
|
// or use the int8.onnx
|
|
mv model.int8.onnx sense-voice.onnx
|
|
}
|
|
|
|
SenseVoice := ModelDir + 'sense-voice.onnx';
|
|
|
|
{
|
|
Please refer to
|
|
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html
|
|
to download paraformer models.
|
|
|
|
Note that you have to rename model.onnx or model.int8.onnx to paraformer.onnx.
|
|
An example is given below for the rename:
|
|
|
|
cp model.onnx paraformer.onnx
|
|
|
|
// or use int8.onnx
|
|
cp model.int8.onnx paraformer.onnx
|
|
}
|
|
Paraformer := ModelDir + 'paraformer.onnx';
|
|
|
|
|
|
{
|
|
please refer to
|
|
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/telespeech/models.html
|
|
to download TeleSpeech models.
|
|
|
|
Note that you have to rename model files after downloading. The following
|
|
is an example
|
|
|
|
mv model.onnx telespeech.onnx
|
|
|
|
// or to use int8.onnx
|
|
|
|
mv model.int8.onnx telespeech.onnx
|
|
}
|
|
|
|
TeleSpeech := ModelDir + 'telespeech.onnx';
|
|
|
|
|
|
{
|
|
Please refer to
|
|
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
|
to download an icefall offline transducer model. Note that you need to rename the
|
|
model files to transducer-encoder.onnx, transducer-decoder.onnx, and
|
|
transducer-joiner.onnx
|
|
}
|
|
TransducerEncoder := ModelDir + 'transducer-encoder.onnx';
|
|
TransducerDecoder := ModelDir + 'transducer-decoder.onnx';
|
|
TransducerJoiner := ModelDir + 'transducer-joiner.onnx';
|
|
|
|
{
|
|
Please visit
|
|
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
|
to donwload a NeMo transducer model.
|
|
}
|
|
NeMoTransducerEncoder := ModelDir + 'nemo-transducer-encoder.onnx';
|
|
NeMoTransducerDecoder := ModelDir + 'nemo-transducer-decoder.onnx';
|
|
NeMoTransducerJoiner := ModelDir + 'nemo-transducer-joiner.onnx';
|
|
|
|
if not FileExists(VadFilename) then
|
|
begin
|
|
ShowMessage(VadFilename + ' does not exist! Please download it from' +
|
|
sLineBreak + 'https://github.com/k2-fsa/sherpa-onnx/tree/asr-models'
|
|
);
|
|
Exit;
|
|
end;
|
|
|
|
Self.Vad := CreateVad(VadFilename);
|
|
|
|
if not FileExists(Tokens) then
|
|
begin
|
|
ShowMessage(Tokens + ' not found. Please download a non-streaming ASR model first!');
|
|
Exit;
|
|
end;
|
|
|
|
if FileExists(WhisperEncoder) and FileExists(WhisperDecoder) then
|
|
begin
|
|
OfflineRecognizer := CreateOfflineRecognizerWhisper(Tokens, WhisperEncoder, WhisperDecoder);
|
|
Msg := 'Whisper';
|
|
end
|
|
else if FileExists(SenseVoice) then
|
|
begin
|
|
OfflineRecognizer := CreateOfflineRecognizerSenseVoice(Tokens, SenseVoice);
|
|
Msg := 'SenseVoice';
|
|
end
|
|
else if FileExists(Paraformer) then
|
|
begin
|
|
OfflineRecognizer := CreateOfflineRecognizerParaformer(Tokens, Paraformer);
|
|
Msg := 'Paraformer';
|
|
end
|
|
else if FileExists(TeleSpeech) then
|
|
begin
|
|
OfflineRecognizer := CreateOfflineRecognizerTeleSpeech(Tokens, TeleSpeech);
|
|
Msg := 'TeleSpeech';
|
|
end
|
|
else if FileExists(TransducerEncoder) and FileExists(TransducerDecoder) and FileExists(TransducerJoiner) then
|
|
begin
|
|
OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens,
|
|
TransducerEncoder, TransducerDecoder, TransducerJoiner, 'transducer');
|
|
Msg := 'Zipformer transducer';
|
|
end
|
|
else if FileExists(NeMoTransducerEncoder) and FileExists(NeMoTransducerDecoder) and FileExists(NeMoTransducerJoiner) then
|
|
begin
|
|
OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens,
|
|
NeMoTransducerEncoder, NeMoTransducerDecoder, NeMoTransducerJoiner, 'nemo_transducer');
|
|
Msg := 'NeMo transducer';
|
|
end
|
|
else
|
|
begin
|
|
ShowMessage('Please download at least one non-streaming speech recognition model first.');
|
|
Exit;
|
|
end;
|
|
|
|
MessageDlg('Congrat! The ' + Msg + ' model is initialized succesfully!', mtInformation, [mbOk], 0);
|
|
FileNameEdt.Enabled := True;
|
|
SelectFileBtn.Enabled := True;
|
|
InitBtn.Enabled := False;
|
|
end;
|
|
|
|
end.
|
|
|