Add Lazarus example for generating subtitles using Silero VAD with non-streaming ASR (#1251)
This commit is contained in:
30
lazarus-examples/.gitignore
vendored
Normal file
30
lazarus-examples/.gitignore
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
# Lazarus compiler-generated binaries (safe to delete)
|
||||
*.exe
|
||||
*.dll
|
||||
*.so
|
||||
*.dylib
|
||||
*.lrs
|
||||
*.res
|
||||
*.compiled
|
||||
*.dbg
|
||||
*.ppu
|
||||
*.o
|
||||
*.or
|
||||
*.a
|
||||
|
||||
# Lazarus autogenerated files (duplicated info)
|
||||
*.rst
|
||||
*.rsj
|
||||
*.lrt
|
||||
|
||||
# Lazarus local files (user-specific info)
|
||||
*.lps
|
||||
|
||||
# Lazarus backups and unit output folders.
|
||||
# These can be changed by user in Lazarus/project options.
|
||||
backup/
|
||||
*.bak
|
||||
lib/
|
||||
|
||||
# Application bundle for Mac OS
|
||||
*.app/
|
||||
3
lazarus-examples/generate_subtitles/.gitignore
vendored
Normal file
3
lazarus-examples/generate_subtitles/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
generate_subtitles.app
|
||||
generate_subtitles
|
||||
generate_subtitles.dSYM
|
||||
BIN
lazarus-examples/generate_subtitles/generate_subtitles.ico
Normal file
BIN
lazarus-examples/generate_subtitles/generate_subtitles.ico
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 130 KiB |
208
lazarus-examples/generate_subtitles/generate_subtitles.lpi
Normal file
208
lazarus-examples/generate_subtitles/generate_subtitles.lpi
Normal file
@@ -0,0 +1,208 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<CONFIG>
|
||||
<ProjectOptions>
|
||||
<Version Value="12"/>
|
||||
<PathDelim Value="\"/>
|
||||
<General>
|
||||
<SessionStorage Value="InProjectDir"/>
|
||||
<Title Value="generate_subtitles"/>
|
||||
<Scaled Value="True"/>
|
||||
<ResourceType Value="res"/>
|
||||
<UseXPManifest Value="True"/>
|
||||
<XPManifest>
|
||||
<DpiAware Value="True"/>
|
||||
</XPManifest>
|
||||
<Icon Value="0"/>
|
||||
</General>
|
||||
<BuildModes>
|
||||
<Item Name="Default" Default="True"/>
|
||||
<Item Name="Debug">
|
||||
<CompilerOptions>
|
||||
<Version Value="11"/>
|
||||
<PathDelim Value="\"/>
|
||||
<Target>
|
||||
<Filename Value="generate_subtitles"/>
|
||||
</Target>
|
||||
<SearchPaths>
|
||||
<IncludeFiles Value="$(ProjOutDir)"/>
|
||||
<Libraries Value="..\..\build-static\install\lib;..\..\build\install\lib"/>
|
||||
<OtherUnitFiles Value="..\..\sherpa-onnx\pascal-api"/>
|
||||
<UnitOutputDirectory Value="lib\$(TargetCPU)-$(TargetOS)"/>
|
||||
</SearchPaths>
|
||||
<Parsing>
|
||||
<SyntaxOptions>
|
||||
<IncludeAssertionCode Value="True"/>
|
||||
</SyntaxOptions>
|
||||
</Parsing>
|
||||
<CodeGeneration>
|
||||
<Checks>
|
||||
<IOChecks Value="True"/>
|
||||
<RangeChecks Value="True"/>
|
||||
<OverflowChecks Value="True"/>
|
||||
<StackChecks Value="True"/>
|
||||
</Checks>
|
||||
<VerifyObjMethodCallValidity Value="True"/>
|
||||
</CodeGeneration>
|
||||
<Linking>
|
||||
<Debugging>
|
||||
<DebugInfoType Value="dsDwarf3"/>
|
||||
<UseHeaptrc Value="True"/>
|
||||
<TrashVariables Value="True"/>
|
||||
<StripSymbols Value="True"/>
|
||||
<UseExternalDbgSyms Value="True"/>
|
||||
</Debugging>
|
||||
<Options>
|
||||
<Win32>
|
||||
<GraphicApplication Value="True"/>
|
||||
</Win32>
|
||||
</Options>
|
||||
</Linking>
|
||||
</CompilerOptions>
|
||||
</Item>
|
||||
<Item Name="Release">
|
||||
<CompilerOptions>
|
||||
<Version Value="11"/>
|
||||
<PathDelim Value="\"/>
|
||||
<Target>
|
||||
<Filename Value="generate_subtitles"/>
|
||||
</Target>
|
||||
<SearchPaths>
|
||||
<IncludeFiles Value="$(ProjOutDir)"/>
|
||||
<Libraries Value="..\..\build-static\install\lib;..\..\build\install\lib"/>
|
||||
<OtherUnitFiles Value="..\..\sherpa-onnx\pascal-api"/>
|
||||
<UnitOutputDirectory Value="lib\$(TargetCPU)-$(TargetOS)"/>
|
||||
</SearchPaths>
|
||||
<CodeGeneration>
|
||||
<SmartLinkUnit Value="True"/>
|
||||
<Optimizations>
|
||||
<OptimizationLevel Value="3"/>
|
||||
</Optimizations>
|
||||
</CodeGeneration>
|
||||
<Linking>
|
||||
<Debugging>
|
||||
<GenerateDebugInfo Value="False"/>
|
||||
<RunWithoutDebug Value="True"/>
|
||||
<StripSymbols Value="True"/>
|
||||
</Debugging>
|
||||
<LinkSmart Value="True"/>
|
||||
<Options>
|
||||
<Win32>
|
||||
<GraphicApplication Value="True"/>
|
||||
</Win32>
|
||||
</Options>
|
||||
</Linking>
|
||||
</CompilerOptions>
|
||||
</Item>
|
||||
<Item Name="Release-Linux">
|
||||
<CompilerOptions>
|
||||
<Version Value="11"/>
|
||||
<PathDelim Value="\"/>
|
||||
<Target>
|
||||
<Filename Value="generate_subtitles"/>
|
||||
</Target>
|
||||
<SearchPaths>
|
||||
<IncludeFiles Value="$(ProjOutDir)"/>
|
||||
<Libraries Value="..\..\build-static\install\lib;..\..\build\install\lib"/>
|
||||
<OtherUnitFiles Value="..\..\sherpa-onnx\pascal-api"/>
|
||||
<UnitOutputDirectory Value="lib\$(TargetCPU)-$(TargetOS)"/>
|
||||
</SearchPaths>
|
||||
<CodeGeneration>
|
||||
<SmartLinkUnit Value="True"/>
|
||||
<Optimizations>
|
||||
<OptimizationLevel Value="3"/>
|
||||
</Optimizations>
|
||||
</CodeGeneration>
|
||||
<Linking>
|
||||
<Debugging>
|
||||
<GenerateDebugInfo Value="False"/>
|
||||
<RunWithoutDebug Value="True"/>
|
||||
<StripSymbols Value="True"/>
|
||||
</Debugging>
|
||||
<LinkSmart Value="True"/>
|
||||
<Options>
|
||||
<Win32>
|
||||
<GraphicApplication Value="True"/>
|
||||
</Win32>
|
||||
</Options>
|
||||
</Linking>
|
||||
<Other>
|
||||
<CustomOptions Value="-dSHERPA_ONNX_USE_SHARED_LIBS"/>
|
||||
</Other>
|
||||
</CompilerOptions>
|
||||
</Item>
|
||||
</BuildModes>
|
||||
<PublishOptions>
|
||||
<Version Value="2"/>
|
||||
<UseFileFilters Value="True"/>
|
||||
</PublishOptions>
|
||||
<RunParams>
|
||||
<FormatVersion Value="2"/>
|
||||
</RunParams>
|
||||
<RequiredPackages>
|
||||
<Item>
|
||||
<PackageName Value="LCL"/>
|
||||
</Item>
|
||||
</RequiredPackages>
|
||||
<Units>
|
||||
<Unit>
|
||||
<Filename Value="generate_subtitles.lpr"/>
|
||||
<IsPartOfProject Value="True"/>
|
||||
</Unit>
|
||||
<Unit>
|
||||
<Filename Value="unit1.pas"/>
|
||||
<IsPartOfProject Value="True"/>
|
||||
<ComponentName Value="Form1"/>
|
||||
<HasResources Value="True"/>
|
||||
<ResourceBaseClass Value="Form"/>
|
||||
<UnitName Value="Unit1"/>
|
||||
</Unit>
|
||||
<Unit>
|
||||
<Filename Value="my_worker.pas"/>
|
||||
<IsPartOfProject Value="True"/>
|
||||
</Unit>
|
||||
</Units>
|
||||
</ProjectOptions>
|
||||
<CompilerOptions>
|
||||
<Version Value="11"/>
|
||||
<PathDelim Value="\"/>
|
||||
<Target>
|
||||
<Filename Value="generate_subtitles"/>
|
||||
</Target>
|
||||
<SearchPaths>
|
||||
<IncludeFiles Value="$(ProjOutDir)"/>
|
||||
<Libraries Value="..\..\build-static\install\lib;..\..\build\install\lib"/>
|
||||
<OtherUnitFiles Value="..\..\sherpa-onnx\pascal-api"/>
|
||||
<UnitOutputDirectory Value="lib\$(TargetCPU)-$(TargetOS)"/>
|
||||
</SearchPaths>
|
||||
<CodeGeneration>
|
||||
<Optimizations>
|
||||
<OptimizationLevel Value="2"/>
|
||||
</Optimizations>
|
||||
</CodeGeneration>
|
||||
<Linking>
|
||||
<Debugging>
|
||||
<GenerateDebugInfo Value="False"/>
|
||||
<DebugInfoType Value="dsDwarf3"/>
|
||||
<StripSymbols Value="True"/>
|
||||
</Debugging>
|
||||
<Options>
|
||||
<Win32>
|
||||
<GraphicApplication Value="True"/>
|
||||
</Win32>
|
||||
</Options>
|
||||
</Linking>
|
||||
</CompilerOptions>
|
||||
<Debugging>
|
||||
<Exceptions>
|
||||
<Item>
|
||||
<Name Value="EAbort"/>
|
||||
</Item>
|
||||
<Item>
|
||||
<Name Value="ECodetoolError"/>
|
||||
</Item>
|
||||
<Item>
|
||||
<Name Value="EFOpenError"/>
|
||||
</Item>
|
||||
</Exceptions>
|
||||
</Debugging>
|
||||
</CONFIG>
|
||||
26
lazarus-examples/generate_subtitles/generate_subtitles.lpr
Normal file
26
lazarus-examples/generate_subtitles/generate_subtitles.lpr
Normal file
@@ -0,0 +1,26 @@
|
||||
program generate_subtitles;
|
||||
|
||||
{$mode objfpc}{$H+}
|
||||
|
||||
uses
|
||||
{$IFDEF UNIX}
|
||||
cthreads,
|
||||
cmem,
|
||||
{$ENDIF}
|
||||
{$IFDEF HASAMIGA}
|
||||
athreads,
|
||||
{$ENDIF}
|
||||
Interfaces, // this includes the LCL widgetset
|
||||
Forms, unit1, my_worker
|
||||
{ you can add units after this };
|
||||
|
||||
{$R *.res}
|
||||
|
||||
begin
|
||||
RequireDerivedFormResource:=True;
|
||||
Application.Scaled:=True;
|
||||
Application.Initialize;
|
||||
Application.CreateForm(TForm1, Form1);
|
||||
Application.Run;
|
||||
end.
|
||||
|
||||
160
lazarus-examples/generate_subtitles/my_worker.pas
Normal file
160
lazarus-examples/generate_subtitles/my_worker.pas
Normal file
@@ -0,0 +1,160 @@
|
||||
unit my_worker;
|
||||
|
||||
{$mode ObjFPC}{$H+}
|
||||
|
||||
{
|
||||
See
|
||||
https://wiki.lazarus.freepascal.org/Multithreaded_Application_Tutorial
|
||||
|
||||
https://www.freepascal.org/docs-html/rtl/classes/tthread.html
|
||||
}
|
||||
|
||||
interface
|
||||
|
||||
uses
|
||||
{$IFDEF UNIX}
|
||||
cthreads,
|
||||
cmem,
|
||||
{$ENDIF}
|
||||
{$IFDEF HASAMIGA}
|
||||
athreads,
|
||||
{$ENDIF}
|
||||
Classes, SysUtils;
|
||||
|
||||
type
|
||||
TMyWorkerThread = class(TThread)
|
||||
private
|
||||
Status: AnsiString;
|
||||
StartTime: Single;
|
||||
StopTime: Single;
|
||||
TotalDuration: Single;
|
||||
procedure ShowStatus;
|
||||
procedure ShowProgress;
|
||||
protected
|
||||
procedure Execute; override;
|
||||
public
|
||||
WaveFilename: AnsiString;
|
||||
Constructor Create(CreateSuspended : boolean; Filename: AnsiString);
|
||||
end;
|
||||
|
||||
var
|
||||
MyWorkerThread: TMyWorkerThread;
|
||||
|
||||
implementation
|
||||
|
||||
uses
|
||||
unit1, sherpa_onnx;
|
||||
|
||||
constructor TMyWorkerThread.Create(CreateSuspended : boolean; Filename: AnsiString);
|
||||
begin
|
||||
inherited Create(CreateSuspended);
|
||||
WaveFilename := Filename;
|
||||
FreeOnTerminate := True;
|
||||
end;
|
||||
|
||||
procedure TMyWorkerThread.ShowStatus;
|
||||
begin
|
||||
Form1.UpdateResult(Status, StartTime, StopTime, TotalDuration);
|
||||
end;
|
||||
|
||||
procedure TMyWorkerThread.ShowProgress;
|
||||
begin
|
||||
Form1.UpdateProgress(StopTime, TotalDuration);
|
||||
end;
|
||||
|
||||
procedure TMyWorkerThread.Execute;
|
||||
var
|
||||
Wave: TSherpaOnnxWave;
|
||||
WindowSize: Integer;
|
||||
Offset: Integer;
|
||||
SpeechSegment: TSherpaOnnxSpeechSegment;
|
||||
|
||||
Duration: Single;
|
||||
|
||||
|
||||
Stream: TSherpaOnnxOfflineStream;
|
||||
RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
|
||||
begin
|
||||
Wave := SherpaOnnxReadWave(WaveFilename);
|
||||
TotalDuration := 0;
|
||||
StartTime := 0;
|
||||
StopTime := 0;
|
||||
if (Wave.Samples = nil) or (Length(Wave.Samples) = 0) then
|
||||
begin
|
||||
Status := Format('Failed to read %s. We only support 1 channel, 16000Hz, 16-bit encoded wave files',
|
||||
[Wavefilename]);
|
||||
Synchronize(@ShowStatus);
|
||||
|
||||
Exit;
|
||||
end;
|
||||
if Wave.SampleRate <> 16000 then
|
||||
begin
|
||||
Status := Format('Expected sample rate 16000. Given %d. Please select a new file', [Wave.SampleRate]);
|
||||
Synchronize(@ShowStatus);
|
||||
Exit;
|
||||
end;
|
||||
TotalDuration := Length(Wave.Samples) / Wave.SampleRate;
|
||||
WindowSize := Form1.Vad.Config.SileroVad.WindowSize;
|
||||
|
||||
Offset := 0;
|
||||
Form1.Vad.Reset;
|
||||
|
||||
while not Terminated and (Offset + WindowSize <= Length(Wave.Samples)) do
|
||||
begin
|
||||
Form1.Vad.AcceptWaveform(Wave.Samples, Offset, WindowSize);
|
||||
Offset += WindowSize;
|
||||
StopTime := Offset / Wave.SampleRate;
|
||||
|
||||
if (Offset mod 20480) = 0 then
|
||||
Synchronize(@ShowProgress);
|
||||
|
||||
while not Terminated and not Form1.Vad.IsEmpty do
|
||||
begin
|
||||
SpeechSegment := Form1.Vad.Front;
|
||||
Form1.Vad.Pop;
|
||||
Stream := Form1.OfflineRecognizer.CreateStream;
|
||||
|
||||
Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate);
|
||||
Form1.OfflineRecognizer.Decode(Stream);
|
||||
RecognitionResult := Form1.OfflineRecognizer.GetResult(Stream);
|
||||
|
||||
StartTime := SpeechSegment.Start / Wave.SampleRate;
|
||||
Duration := Length(SpeechSegment.Samples) / Wave.SampleRate;
|
||||
StopTime := StartTime + Duration;
|
||||
Status := RecognitionResult.Text;
|
||||
|
||||
Synchronize(@ShowStatus);
|
||||
FreeAndNil(Stream);
|
||||
end;
|
||||
end;
|
||||
|
||||
Form1.Vad.Flush;
|
||||
while not Terminated and not Form1.Vad.IsEmpty do
|
||||
begin
|
||||
SpeechSegment := Form1.Vad.Front;
|
||||
Form1.Vad.Pop;
|
||||
Stream := Form1.OfflineRecognizer.CreateStream;
|
||||
|
||||
Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate);
|
||||
Form1.OfflineRecognizer.Decode(Stream);
|
||||
RecognitionResult := Form1.OfflineRecognizer.GetResult(Stream);
|
||||
|
||||
StartTime := SpeechSegment.Start / Wave.SampleRate;
|
||||
Duration := Length(SpeechSegment.Samples) / Wave.SampleRate;
|
||||
StopTime := StartTime + Duration;
|
||||
Status := RecognitionResult.Text;
|
||||
|
||||
Synchronize(@ShowStatus);
|
||||
FreeAndNil(Stream);
|
||||
end;
|
||||
|
||||
if Terminated then
|
||||
Status := 'Cancelled!'
|
||||
else
|
||||
Status := 'DONE!';
|
||||
|
||||
Synchronize(@ShowStatus);
|
||||
end;
|
||||
|
||||
end.
|
||||
|
||||
74
lazarus-examples/generate_subtitles/unit1.lfm
Normal file
74
lazarus-examples/generate_subtitles/unit1.lfm
Normal file
@@ -0,0 +1,74 @@
|
||||
object Form1: TForm1
|
||||
Left = 366
|
||||
Height = 623
|
||||
Top = 117
|
||||
Width = 852
|
||||
Caption = 'Next-gen Kaldi: Generate Subtitles'
|
||||
ClientHeight = 623
|
||||
ClientWidth = 852
|
||||
OnClose = FormClose
|
||||
OnCreate = FormCreate
|
||||
LCLVersion = '3.4.0.0'
|
||||
object FileNameEdt: TEdit
|
||||
Left = 200
|
||||
Height = 22
|
||||
Top = 40
|
||||
Width = 440
|
||||
TabOrder = 0
|
||||
OnChange = FileNameEdtChange
|
||||
end
|
||||
object SelectFileBtn: TButton
|
||||
Left = 96
|
||||
Height = 25
|
||||
Top = 40
|
||||
Width = 97
|
||||
Caption = 'Select a file...'
|
||||
TabOrder = 1
|
||||
OnClick = SelectFileBtnClick
|
||||
end
|
||||
object StartBtn: TButton
|
||||
Left = 656
|
||||
Height = 25
|
||||
Top = 37
|
||||
Width = 75
|
||||
Caption = 'Start'
|
||||
TabOrder = 2
|
||||
OnClick = StartBtnClick
|
||||
end
|
||||
object InitBtn: TButton
|
||||
Left = 280
|
||||
Height = 25
|
||||
Top = 8
|
||||
Width = 280
|
||||
Caption = 'Click me to intialize models before you start'
|
||||
TabOrder = 3
|
||||
OnClick = InitBtnClick
|
||||
end
|
||||
object ResultMemo: TMemo
|
||||
Left = 24
|
||||
Height = 488
|
||||
Top = 72
|
||||
Width = 800
|
||||
ScrollBars = ssAutoBoth
|
||||
TabOrder = 4
|
||||
end
|
||||
object ProgressBar: TProgressBar
|
||||
Left = 32
|
||||
Height = 16
|
||||
Top = 592
|
||||
Width = 792
|
||||
TabOrder = 5
|
||||
end
|
||||
object ProgressLabel: TLabel
|
||||
Left = 770
|
||||
Height = 16
|
||||
Top = 568
|
||||
Width = 8
|
||||
Caption = '0'
|
||||
end
|
||||
object SelectFileDlg: TOpenDialog
|
||||
Title = 'Open a wave file'
|
||||
Left = 600
|
||||
Top = 488
|
||||
end
|
||||
end
|
||||
502
lazarus-examples/generate_subtitles/unit1.pas
Normal file
502
lazarus-examples/generate_subtitles/unit1.pas
Normal file
@@ -0,0 +1,502 @@
|
||||
unit Unit1;
|
||||
|
||||
{$mode objfpc}{$H+}
|
||||
|
||||
{$IFDEF DARWIN}
|
||||
{$modeswitch objectivec1} {For getting resource directory}
|
||||
{$ENDIF}
|
||||
|
||||
interface
|
||||
|
||||
uses
|
||||
Classes, SysUtils, StrUtils, Forms, Controls,
|
||||
Graphics, Dialogs, StdCtrls,
|
||||
sherpa_onnx, ComCtrls;
|
||||
|
||||
type
|
||||
|
||||
{ TForm1 }
|
||||
|
||||
TForm1 = class(TForm)
|
||||
InitBtn: TButton;
|
||||
ProgressBar: TProgressBar;
|
||||
ResultMemo: TMemo;
|
||||
StartBtn: TButton;
|
||||
SelectFileDlg: TOpenDialog;
|
||||
SelectFileBtn: TButton;
|
||||
FileNameEdt: TEdit;
|
||||
ProgressLabel: TLabel;
|
||||
procedure FileNameEdtChange(Sender: TObject);
|
||||
procedure FormClose(Sender: TObject; var CloseAction: TCloseAction);
|
||||
procedure InitBtnClick(Sender: TObject);
|
||||
procedure SelectFileBtnClick(Sender: TObject);
|
||||
procedure FormCreate(Sender: TObject);
|
||||
procedure StartBtnClick(Sender: TObject);
|
||||
private
|
||||
|
||||
public
|
||||
procedure UpdateResult(
|
||||
Msg: AnsiString;
|
||||
StartTime: Single;
|
||||
StopTime: Single;
|
||||
TotalDuration: Single);
|
||||
procedure UpdateProgress(StopTime: Single; TotalDuration: Single);
|
||||
public
|
||||
Vad: TSherpaOnnxVoiceActivityDetector;
|
||||
OfflineRecognizer: TSherpaOnnxOfflineRecognizer;
|
||||
end;
|
||||
|
||||
var
|
||||
Form1: TForm1;
|
||||
|
||||
implementation
|
||||
|
||||
uses
|
||||
my_worker
|
||||
{$IFDEF DARWIN}
|
||||
,MacOSAll
|
||||
,CocoaAll
|
||||
{$ENDIF}
|
||||
;
|
||||
{See https://wiki.lazarus.freepascal.org/Locating_the_macOS_application_resources_directory}
|
||||
|
||||
{$IFDEF DARWIN}
|
||||
{Note: The returned path contains a trailing /}
|
||||
function GetResourcesPath(): AnsiString;
|
||||
var
|
||||
pathStr: shortstring;
|
||||
status: Boolean = false;
|
||||
begin
|
||||
status := CFStringGetPascalString(CFStringRef(NSBundle.mainBundle.resourcePath), @pathStr, 255, CFStringGetSystemEncoding());
|
||||
|
||||
if status = true then
|
||||
Result := pathStr + PathDelim
|
||||
else
|
||||
raise Exception.Create('Error in GetResourcesPath()');
|
||||
end;
|
||||
{$ENDIF}
|
||||
|
||||
function CreateVad(VadFilename: AnsiString): TSherpaOnnxVoiceActivityDetector;
|
||||
var
|
||||
Config: TSherpaOnnxVadModelConfig;
|
||||
|
||||
SampleRate: Integer;
|
||||
WindowSize: Integer;
|
||||
begin
|
||||
Initialize(Config);
|
||||
|
||||
SampleRate := 16000; {Please don't change it unless you know the details}
|
||||
WindowSize := 512; {Please don't change it unless you know the details}
|
||||
|
||||
Config.SileroVad.Model := VadFilename;
|
||||
Config.SileroVad.MinSpeechDuration := 0.5;
|
||||
Config.SileroVad.MinSilenceDuration := 0.5;
|
||||
Config.SileroVad.Threshold := 0.5;
|
||||
Config.SileroVad.WindowSize := WindowSize;
|
||||
Config.NumThreads:= 2;
|
||||
Config.Debug:= True;
|
||||
Config.Provider:= 'cpu';
|
||||
Config.SampleRate := SampleRate;
|
||||
|
||||
Result := TSherpaOnnxVoiceActivityDetector.Create(Config, 30);
|
||||
end;
|
||||
|
||||
function CreateOfflineRecognizerTransducer(
|
||||
Tokens: AnsiString;
|
||||
Encoder: AnsiString;
|
||||
Decoder: AnsiString;
|
||||
Joiner: AnsiString;
|
||||
ModelType: AnsiString): TSherpaOnnxOfflineRecognizer;
|
||||
var
|
||||
Config: TSherpaOnnxOfflineRecognizerConfig;
|
||||
begin
|
||||
Initialize(Config);
|
||||
|
||||
Config.ModelConfig.Transducer.Encoder := Encoder;
|
||||
Config.ModelConfig.Transducer.Decoder := Decoder;
|
||||
Config.ModelConfig.Transducer.Joiner := Joiner;
|
||||
|
||||
Config.ModelConfig.ModelType := ModelType;
|
||||
Config.ModelConfig.Tokens := Tokens;
|
||||
Config.ModelConfig.Provider := 'cpu';
|
||||
Config.ModelConfig.NumThreads := 2;
|
||||
Config.ModelConfig.Debug := False;
|
||||
|
||||
Result := TSherpaOnnxOfflineRecognizer.Create(Config);
|
||||
end;
|
||||
|
||||
function CreateOfflineRecognizerTeleSpeech(
|
||||
Tokens: AnsiString;
|
||||
TeleSpeech: AnsiString): TSherpaOnnxOfflineRecognizer;
|
||||
var
|
||||
Config: TSherpaOnnxOfflineRecognizerConfig;
|
||||
begin
|
||||
Initialize(Config);
|
||||
|
||||
Config.ModelConfig.TeleSpeechCtc := TeleSpeech;
|
||||
|
||||
Config.ModelConfig.Tokens := Tokens;
|
||||
Config.ModelConfig.Provider := 'cpu';
|
||||
Config.ModelConfig.NumThreads := 2;
|
||||
Config.ModelConfig.Debug := False;
|
||||
|
||||
Result := TSherpaOnnxOfflineRecognizer.Create(Config);
|
||||
end;
|
||||
|
||||
function CreateOfflineRecognizerParaformer(
|
||||
Tokens: AnsiString;
|
||||
Paraformer: AnsiString): TSherpaOnnxOfflineRecognizer;
|
||||
var
|
||||
Config: TSherpaOnnxOfflineRecognizerConfig;
|
||||
begin
|
||||
Initialize(Config);
|
||||
|
||||
Config.ModelConfig.Paraformer.Model := Paraformer;
|
||||
|
||||
Config.ModelConfig.Tokens := Tokens;
|
||||
Config.ModelConfig.Provider := 'cpu';
|
||||
Config.ModelConfig.NumThreads := 2;
|
||||
Config.ModelConfig.Debug := False;
|
||||
|
||||
Result := TSherpaOnnxOfflineRecognizer.Create(Config);
|
||||
end;
|
||||
|
||||
function CreateOfflineRecognizerSenseVoice(
|
||||
Tokens: AnsiString;
|
||||
SenseVoice: AnsiString): TSherpaOnnxOfflineRecognizer;
|
||||
var
|
||||
Config: TSherpaOnnxOfflineRecognizerConfig;
|
||||
begin
|
||||
Initialize(Config);
|
||||
|
||||
Config.ModelConfig.SenseVoice.Model := SenseVoice;
|
||||
Config.ModelConfig.SenseVoice.Language := 'auto';
|
||||
Config.ModelConfig.SenseVoice.UseItn := True;
|
||||
Config.ModelConfig.Tokens := Tokens;
|
||||
Config.ModelConfig.Provider := 'cpu';
|
||||
Config.ModelConfig.NumThreads := 2;
|
||||
Config.ModelConfig.Debug := False;
|
||||
|
||||
Result := TSherpaOnnxOfflineRecognizer.Create(Config);
|
||||
end;
|
||||
|
||||
function CreateOfflineRecognizerWhisper(
|
||||
Tokens: AnsiString;
|
||||
WhisperEncoder: AnsiString;
|
||||
WhisperDecoder: AnsiString): TSherpaOnnxOfflineRecognizer;
|
||||
var
|
||||
Config: TSherpaOnnxOfflineRecognizerConfig;
|
||||
begin
|
||||
Initialize(Config);
|
||||
|
||||
Config.ModelConfig.Whisper.Encoder := WhisperEncoder;
|
||||
Config.ModelConfig.Whisper.Decoder := WhisperDecoder;
|
||||
Config.ModelConfig.Tokens := Tokens;
|
||||
Config.ModelConfig.Provider := 'cpu';
|
||||
Config.ModelConfig.NumThreads := 2;
|
||||
Config.ModelConfig.Debug := False;
|
||||
|
||||
Result := TSherpaOnnxOfflineRecognizer.Create(Config);
|
||||
end;
|
||||
|
||||
{$R *.lfm}
|
||||
|
||||
{ TForm1 }
|
||||
|
||||
procedure TForm1.FormCreate(Sender: TObject);
|
||||
begin
|
||||
StartBtn.Enabled := False;
|
||||
SelectFileDlg.Filter := 'All Files|*.wav';
|
||||
FileNameEdt.Enabled := False;
|
||||
SelectFileBtn.Enabled := False;
|
||||
ResultMemo.Lines.Add('1. It supports only 1 channel, 16-bit, 16000Hz wav files');
|
||||
ResultMemo.Lines.Add('2. There should be no Chinese characters in the file path.');
|
||||
|
||||
ProgressBar.Position := 0;
|
||||
ProgressLabel.Caption := '';
|
||||
end;
|
||||
|
||||
procedure TForm1.StartBtnClick(Sender: TObject);
|
||||
begin
|
||||
if StartBtn.Caption = 'Stop' then
|
||||
begin
|
||||
if (MyWorkerThread <> nil) and not MyWorkerThread.Finished then
|
||||
MyWorkerThread.Terminate;
|
||||
|
||||
StartBtn.Caption := 'Start';
|
||||
Exit;
|
||||
end;
|
||||
|
||||
ResultMemo.Lines.Clear();
|
||||
ResultMemo.Lines.Add('Start processing');
|
||||
|
||||
ProgressBar.Position := 0;
|
||||
ProgressLabel.Caption := Format('%d%%', [ProgressBar.Position]);
|
||||
|
||||
MyWorkerThread := TMyWorkerThread.Create(False, FileNameEdt.Text);
|
||||
|
||||
StartBtn.Caption := 'Stop';
|
||||
end;
|
||||
|
||||
procedure TForm1.SelectFileBtnClick(Sender: TObject);
|
||||
begin
|
||||
if SelectFileDlg.Execute then
|
||||
begin
|
||||
FileNameEdt.Text := SelectFileDlg.FileName;
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure TForm1.FileNameEdtChange(Sender: TObject);
|
||||
begin
|
||||
if FileExists(FileNameEdt.Text) then
|
||||
StartBtn.Enabled := True
|
||||
else
|
||||
StartBtn.Enabled := False;
|
||||
end;
|
||||
|
||||
procedure TForm1.FormClose(Sender: TObject; var CloseAction: TCloseAction);
|
||||
begin
|
||||
if (MyWorkerThread <> nil) and not MyWorkerThread.Finished then
|
||||
begin
|
||||
MyWorkerThread.Terminate;
|
||||
MyWorkerThread.WaitFor;
|
||||
end;
|
||||
FreeAndNil(Vad);
|
||||
FreeAndNil(OfflineRecognizer);
|
||||
end;
|
||||
|
||||
procedure TForm1.UpdateProgress(StopTime: Single; TotalDuration: Single);
|
||||
var
|
||||
Percent: Single;
|
||||
begin
|
||||
if (StopTime <> 0) and (TotalDuration <> 0) then
|
||||
begin
|
||||
Percent := StopTime / TotalDuration * 100;
|
||||
ProgressBar.Position := Round(Percent);
|
||||
ProgressLabel.Caption := Format('%d%%', [ProgressBar.Position]);
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure TForm1.UpdateResult(
|
||||
Msg: AnsiString;
|
||||
StartTime: Single;
|
||||
StopTime: Single;
|
||||
TotalDuration: Single);
|
||||
var
|
||||
NewResult: AnsiString;
|
||||
begin
|
||||
UpdateProgress(StopTime, TotalDuration);
|
||||
|
||||
if (Msg = 'DONE!') or
|
||||
(Msg = 'Cancelled!') or
|
||||
EndsStr('16-bit encoded wave files', Msg) or
|
||||
EndsStr('. Please select a new file', Msg) then
|
||||
begin
|
||||
Form1.StartBtn.Caption := 'Start';
|
||||
NewResult := Msg;
|
||||
end
|
||||
else
|
||||
begin
|
||||
NewResult := Format('%.3f -- %.3f %s', [StartTime, StopTime, Msg]);
|
||||
end;
|
||||
|
||||
if Msg = 'DONE!' then
|
||||
begin
|
||||
ProgressBar.Position := 100;
|
||||
|
||||
ProgressLabel.Caption := '100%';
|
||||
end;
|
||||
|
||||
Form1.ResultMemo.Lines.Add(NewResult);
|
||||
end;
|
||||
|
||||
procedure TForm1.InitBtnClick(Sender: TObject);
|
||||
var
|
||||
Msg: AnsiString;
|
||||
ModelDir: AnsiString;
|
||||
VadFilename: AnsiString;
|
||||
Tokens: AnsiString;
|
||||
|
||||
WhisperEncoder: AnsiString;
|
||||
WhisperDecoder: AnsiString;
|
||||
|
||||
SenseVoice: AnsiString;
|
||||
|
||||
Paraformer: AnsiString;
|
||||
|
||||
TeleSpeech: AnsiString;
|
||||
|
||||
TransducerEncoder: AnsiString; // from icefall
|
||||
TransducerDecoder: AnsiString;
|
||||
TransducerJoiner: AnsiString;
|
||||
|
||||
NeMoTransducerEncoder: AnsiString;
|
||||
NeMoTransducerDecoder: AnsiString;
|
||||
NeMoTransducerJoiner: AnsiString;
|
||||
begin
|
||||
{$IFDEF DARWIN}
|
||||
ModelDir := GetResourcesPath;
|
||||
{$ELSE}
|
||||
ModelDir := './';
|
||||
{$ENDIF}
|
||||
|
||||
VadFilename := ModelDir + 'silero_vad.onnx';
|
||||
Tokens := ModelDir + 'tokens.txt';
|
||||
|
||||
{
|
||||
Please refer to
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/export-onnx.html#available-models
|
||||
for a list of whisper models.
|
||||
|
||||
In the code, we use the normalized filename whisper-encoder.onnx, whisper-decoder.onnx, and tokens.txt
|
||||
You need to rename the existing model files.
|
||||
|
||||
For instance, if you use sherpa-onnx-whisper-tiny.en, you have to do
|
||||
mv tiny.en-tokens.txt tokens.txt
|
||||
|
||||
mv tiny.en-encoder.onnx whisper-encoder.onnx
|
||||
mv tiny.en-decoder.onnx whisper-decoder.onnx
|
||||
|
||||
// or use the int8.onnx
|
||||
|
||||
mv tiny.en-encoder.int8.onnx whisper-encoder.onnx
|
||||
mv tiny.en-decoder.int8.onnx whisper-decoder.onnx
|
||||
}
|
||||
WhisperEncoder := ModelDir + 'whisper-encoder.onnx';
|
||||
WhisperDecoder := ModelDir + 'whisper-decoder.onnx';
|
||||
|
||||
|
||||
{
|
||||
Please refer to
|
||||
https://k2-fsa.github.io/sherpa/onnx/sense-voice/pretrained.html#pre-trained-models
|
||||
to download models for SenseVoice.
|
||||
|
||||
In the code, we use the normalized model name sense-voice.onnx. You have
|
||||
to rename the downloaded model files.
|
||||
|
||||
For example, you need to use
|
||||
|
||||
mv model.onnx sense-voice.onnx
|
||||
|
||||
// or use the int8.onnx
|
||||
mv model.int8.onnx sense-voice.onnx
|
||||
}
|
||||
|
||||
SenseVoice := ModelDir + 'sense-voice.onnx';
|
||||
|
||||
{
|
||||
Please refer to
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html
|
||||
to download paraformer models.
|
||||
|
||||
Note that you have to rename model.onnx or model.int8.onnx to paraformer.onnx.
|
||||
An example is given below for the rename:
|
||||
|
||||
cp model.onnx paraformer.onnx
|
||||
|
||||
// or use int8.onnx
|
||||
cp model.int8.onnx paraformer.onnx
|
||||
}
|
||||
Paraformer := ModelDir + 'paraformer.onnx';
|
||||
|
||||
|
||||
{
|
||||
please refer to
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/telespeech/models.html
|
||||
to download TeleSpeech models.
|
||||
|
||||
Note that you have to rename model files after downloading. The following
|
||||
is an example
|
||||
|
||||
mv model.onnx telespeech.onnx
|
||||
|
||||
// or to use int8.onnx
|
||||
|
||||
mv model.int8.onnx telespeech.onnx
|
||||
}
|
||||
|
||||
TeleSpeech := ModelDir + 'telespeech.onnx';
|
||||
|
||||
|
||||
{
|
||||
Please refer to
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||
to download an icefall offline transducer model. Note that you need to rename the
|
||||
model files to transducer-encoder.onnx, transducer-decoder.onnx, and
|
||||
transducer-joiner.onnx
|
||||
}
|
||||
TransducerEncoder := ModelDir + 'transducer-encoder.onnx';
|
||||
TransducerDecoder := ModelDir + 'transducer-decoder.onnx';
|
||||
TransducerJoiner := ModelDir + 'transducer-joiner.onnx';
|
||||
|
||||
{
|
||||
Please visit
|
||||
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||
to donwload a NeMo transducer model.
|
||||
}
|
||||
NeMoTransducerEncoder := ModelDir + 'nemo-transducer-encoder.onnx';
|
||||
NeMoTransducerDecoder := ModelDir + 'nemo-transducer-decoder.onnx';
|
||||
NeMoTransducerJoiner := ModelDir + 'nemo-transducer-joiner.onnx';
|
||||
|
||||
if not FileExists(VadFilename) then
|
||||
begin
|
||||
ShowMessage(VadFilename + ' does not exist! Please download it from' +
|
||||
sLineBreak + 'https://github.com/k2-fsa/sherpa-onnx/tree/asr-models'
|
||||
);
|
||||
Exit;
|
||||
end;
|
||||
|
||||
Self.Vad := CreateVad(VadFilename);
|
||||
|
||||
if not FileExists(Tokens) then
|
||||
begin
|
||||
ShowMessage(Tokens + ' not found. Please download a non-streaming ASR model first!');
|
||||
Exit;
|
||||
end;
|
||||
|
||||
if FileExists(WhisperEncoder) and FileExists(WhisperDecoder) then
|
||||
begin
|
||||
OfflineRecognizer := CreateOfflineRecognizerWhisper(Tokens, WhisperEncoder, WhisperDecoder);
|
||||
Msg := 'Whisper';
|
||||
end
|
||||
else if FileExists(SenseVoice) then
|
||||
begin
|
||||
OfflineRecognizer := CreateOfflineRecognizerSenseVoice(Tokens, SenseVoice);
|
||||
Msg := 'SenseVoice';
|
||||
end
|
||||
else if FileExists(Paraformer) then
|
||||
begin
|
||||
OfflineRecognizer := CreateOfflineRecognizerParaformer(Tokens, Paraformer);
|
||||
Msg := 'Paraformer';
|
||||
end
|
||||
else if FileExists(TeleSpeech) then
|
||||
begin
|
||||
OfflineRecognizer := CreateOfflineRecognizerTeleSpeech(Tokens, TeleSpeech);
|
||||
Msg := 'TeleSpeech';
|
||||
end
|
||||
else if FileExists(TransducerEncoder) and FileExists(TransducerDecoder) and FileExists(TransducerJoiner) then
|
||||
begin
|
||||
OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens,
|
||||
TransducerEncoder, TransducerDecoder, TransducerJoiner, 'transducer');
|
||||
Msg := 'Zipformer transducer';
|
||||
end
|
||||
else if FileExists(NeMoTransducerEncoder) and FileExists(NeMoTransducerDecoder) and FileExists(NeMoTransducerJoiner) then
|
||||
begin
|
||||
OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens,
|
||||
NeMoTransducerEncoder, NeMoTransducerDecoder, NeMoTransducerJoiner, 'nemo_transducer');
|
||||
Msg := 'NeMo transducer';
|
||||
end
|
||||
else
|
||||
begin
|
||||
ShowMessage('Please download at least one non-streaming speech recognition model first.');
|
||||
Exit;
|
||||
end;
|
||||
|
||||
MessageDlg('Congrat! The ' + Msg + ' model is initialized succesfully!', mtInformation, [mbOk], 0);
|
||||
FileNameEdt.Enabled := True;
|
||||
SelectFileBtn.Enabled := True;
|
||||
InitBtn.Enabled := False;
|
||||
end;
|
||||
|
||||
end.
|
||||
|
||||
Reference in New Issue
Block a user