Pascal API for VAD (#1249)
This commit is contained in:
@@ -95,6 +95,8 @@ void CircularBuffer::Push(const float *p, int32_t n) {
|
||||
"capacity to: %d",
|
||||
n, size, n + size, capacity, new_capacity);
|
||||
Resize(new_capacity);
|
||||
|
||||
capacity = new_capacity;
|
||||
}
|
||||
|
||||
int32_t start = tail_ % capacity;
|
||||
|
||||
@@ -2,9 +2,11 @@
|
||||
|
||||
unit sherpa_onnx;
|
||||
|
||||
{$mode objfpc}
|
||||
{$IFDEF FPC}
|
||||
{$mode objfpc}
|
||||
{$modeSwitch advancedRecords} { to support records with methods }
|
||||
{$ENDIF}
|
||||
|
||||
{$modeSwitch advancedRecords} { to support records with methods }
|
||||
(* {$LongStrings ON} *)
|
||||
|
||||
interface
|
||||
@@ -45,18 +47,21 @@ type
|
||||
ModelingUnit: AnsiString;
|
||||
BpeVocab: AnsiString;
|
||||
function ToString: AnsiString;
|
||||
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOnlineModelConfig);
|
||||
end;
|
||||
|
||||
TSherpaOnnxFeatureConfig = record
|
||||
SampleRate: Integer;
|
||||
FeatureDim: Integer;
|
||||
function ToString: AnsiString;
|
||||
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxFeatureConfig);
|
||||
end;
|
||||
|
||||
TSherpaOnnxOnlineCtcFstDecoderConfig = record
|
||||
Graph: AnsiString;
|
||||
MaxActive: Integer;
|
||||
function ToString: AnsiString;
|
||||
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOnlineCtcFstDecoderConfig);
|
||||
end;
|
||||
|
||||
TSherpaOnnxOnlineRecognizerConfig = record
|
||||
@@ -75,6 +80,7 @@ type
|
||||
RuleFars: AnsiString;
|
||||
BlankPenalty: Single;
|
||||
function ToString: AnsiString;
|
||||
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOnlineRecognizerConfig);
|
||||
end;
|
||||
|
||||
TSherpaOnnxOnlineRecognizerResult = record
|
||||
@@ -97,6 +103,7 @@ type
|
||||
TSherpaOnnxOnlineRecognizer = class
|
||||
private
|
||||
Handle: Pointer;
|
||||
_Config: TSherpaOnnxOnlineRecognizerConfig;
|
||||
public
|
||||
constructor Create(Config: TSherpaOnnxOnlineRecognizerConfig);
|
||||
destructor Destroy; override;
|
||||
@@ -108,6 +115,7 @@ type
|
||||
procedure Reset(Stream: TSherpaOnnxOnlineStream);
|
||||
function IsEndpoint(Stream: TSherpaOnnxOnlineStream): Boolean;
|
||||
function GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult;
|
||||
property Config: TSherpaOnnxOnlineRecognizerConfig Read _Config;
|
||||
end;
|
||||
|
||||
TSherpaOnnxOfflineTransducerModelConfig = record
|
||||
@@ -134,6 +142,7 @@ type
|
||||
Task: AnsiString;
|
||||
TailPaddings: Integer;
|
||||
function ToString: AnsiString;
|
||||
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineWhisperModelConfig);
|
||||
end;
|
||||
|
||||
TSherpaOnnxOfflineTdnnModelConfig = record
|
||||
@@ -145,12 +154,14 @@ type
|
||||
Model: AnsiString;
|
||||
Scale: Single;
|
||||
function ToString: AnsiString;
|
||||
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineLMConfig);
|
||||
end;
|
||||
|
||||
TSherpaOnnxOfflineSenseVoiceModelConfig = record
|
||||
Model: AnsiString;
|
||||
Language: AnsiString;
|
||||
UseItn: Boolean;
|
||||
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineSenseVoiceModelConfig);
|
||||
function ToString: AnsiString;
|
||||
end;
|
||||
|
||||
@@ -169,6 +180,7 @@ type
|
||||
BpeVocab: AnsiString;
|
||||
TeleSpeechCtc: AnsiString;
|
||||
SenseVoice: TSherpaOnnxOfflineSenseVoiceModelConfig;
|
||||
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig);
|
||||
function ToString: AnsiString;
|
||||
end;
|
||||
|
||||
@@ -183,6 +195,7 @@ type
|
||||
RuleFsts: AnsiString;
|
||||
RuleFars: AnsiString;
|
||||
BlankPenalty: Single;
|
||||
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineRecognizerConfig);
|
||||
function ToString: AnsiString;
|
||||
end;
|
||||
|
||||
@@ -205,18 +218,83 @@ type
|
||||
TSherpaOnnxOfflineRecognizer = class
|
||||
private
|
||||
Handle: Pointer;
|
||||
_Config: TSherpaOnnxOfflineRecognizerConfig;
|
||||
public
|
||||
constructor Create(Config: TSherpaOnnxOfflineRecognizerConfig);
|
||||
destructor Destroy; override;
|
||||
function CreateStream: TSherpaOnnxOfflineStream;
|
||||
procedure Decode(Stream: TSherpaOnnxOfflineStream);
|
||||
function GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult;
|
||||
property Config: TSherpaOnnxOfflineRecognizerConfig Read _Config;
|
||||
end;
|
||||
|
||||
{ It supports reading a single channel wave with 16-bit encoded samples.
|
||||
Samples are normalized to the range [-1, 1].
|
||||
}
|
||||
function SherpaOnnxReadWave(Filename: AnsiString): TSherpaOnnxWave;
|
||||
TSherpaOnnxSileroVadModelConfig = record
|
||||
Model: AnsiString;
|
||||
Threshold: Single;
|
||||
MinSilenceDuration: Single;
|
||||
MinSpeechDuration: Single;
|
||||
WindowSize: Integer;
|
||||
function ToString: AnsiString;
|
||||
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxSileroVadModelConfig);
|
||||
end;
|
||||
|
||||
TSherpaOnnxVadModelConfig = record
|
||||
SileroVad: TSherpaOnnxSileroVadModelConfig;
|
||||
SampleRate: Integer;
|
||||
NumThreads: Integer;
|
||||
Provider: AnsiString;
|
||||
Debug: Boolean;
|
||||
function ToString: AnsiString;
|
||||
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxVadModelConfig);
|
||||
end;
|
||||
|
||||
TSherpaOnnxSamplesArray = array of Single;
|
||||
|
||||
TSherpaOnnxCircularBuffer = class
|
||||
private
|
||||
Handle: Pointer;
|
||||
public
|
||||
constructor Create(Capacity: Integer);
|
||||
destructor Destroy; override;
|
||||
procedure Push(Samples: array of Single);
|
||||
function Get(StartIndex: Integer; N: Integer): TSherpaOnnxSamplesArray;
|
||||
procedure Pop(N: Integer);
|
||||
procedure Reset;
|
||||
function Size: Integer;
|
||||
function Head: Integer;
|
||||
end;
|
||||
|
||||
TSherpaOnnxSpeechSegment = record
|
||||
Samples: array of Single;
|
||||
Start: Integer;
|
||||
end;
|
||||
|
||||
TSherpaOnnxVoiceActivityDetector = class
|
||||
private
|
||||
Handle: Pointer;
|
||||
_Config: TSherpaOnnxVadModelConfig;
|
||||
public
|
||||
constructor Create(Config: TSherpaOnnxVadModelConfig; BufferSizeInSeconds: Single);
|
||||
destructor Destroy; override;
|
||||
procedure AcceptWaveform(Samples: array of Single); overload;
|
||||
procedure AcceptWaveform(Samples: array of Single; Offset: Integer; N: Integer); overload;
|
||||
function IsEmpty: Boolean;
|
||||
function IsDetected: Boolean;
|
||||
procedure Pop;
|
||||
procedure Clear;
|
||||
function Front: TSherpaOnnxSpeechSegment;
|
||||
procedure Reset;
|
||||
procedure Flush;
|
||||
property Config: TSherpaOnnxVadModelConfig Read _Config;
|
||||
end;
|
||||
|
||||
{ It supports reading a single channel wave with 16-bit encoded samples.
|
||||
Samples are normalized to the range [-1, 1].
|
||||
}
|
||||
function SherpaOnnxReadWave(Filename: AnsiString): TSherpaOnnxWave;
|
||||
|
||||
function SherpaOnnxWriteWave(Filename: AnsiString;
|
||||
Samples: array of Single; SampleRate: Integer): Boolean;
|
||||
|
||||
implementation
|
||||
|
||||
@@ -294,15 +372,15 @@ type
|
||||
DecodingMethod: PAnsiChar;
|
||||
MaxActivePaths: cint32;
|
||||
EnableEndpoint: cint32;
|
||||
Rule1MinTrailingSilence: Single;
|
||||
Rule2MinTrailingSilence: Single;
|
||||
Rule3MinUtteranceLength: Single;
|
||||
Rule1MinTrailingSilence: cfloat;
|
||||
Rule2MinTrailingSilence: cfloat;
|
||||
Rule3MinUtteranceLength: cfloat;
|
||||
HotwordsFile: PAnsiChar;
|
||||
HotwordsScore: Single;
|
||||
HotwordsScore: cfloat;
|
||||
CtcFstDecoderConfig: SherpaOnnxOnlineCtcFstDecoderConfig;
|
||||
RuleFsts: PAnsiChar;
|
||||
RuleFars: PAnsiChar;
|
||||
BlankPenalty: Single;
|
||||
BlankPenalty: cfloat;
|
||||
end;
|
||||
|
||||
PSherpaOnnxOnlineRecognizerConfig = ^SherpaOnnxOnlineRecognizerConfig;
|
||||
@@ -330,7 +408,7 @@ type
|
||||
end;
|
||||
SherpaOnnxOfflineLMConfig = record
|
||||
Model: PAnsiChar;
|
||||
Scale: Single;
|
||||
Scale: cfloat;
|
||||
end;
|
||||
SherpaOnnxOfflineSenseVoiceModelConfig = record
|
||||
Model: PAnsiChar;
|
||||
@@ -361,14 +439,100 @@ type
|
||||
DecodingMethod: PAnsiChar;
|
||||
MaxActivePaths: cint32;
|
||||
HotwordsFile: PAnsiChar;
|
||||
HotwordsScore: Single;
|
||||
HotwordsScore: cfloat;
|
||||
RuleFsts: PAnsiChar;
|
||||
RuleFars: PAnsiChar;
|
||||
BlankPenalty: Single;
|
||||
BlankPenalty: cfloat;
|
||||
end;
|
||||
|
||||
PSherpaOnnxOfflineRecognizerConfig = ^SherpaOnnxOfflineRecognizerConfig;
|
||||
|
||||
SherpaOnnxSileroVadModelConfig = record
|
||||
Model: PAnsiChar;
|
||||
Threshold: cfloat;
|
||||
MinSilenceDuration: cfloat;
|
||||
MinSpeechDuration: cfloat;
|
||||
WindowSize: cint32;
|
||||
end;
|
||||
SherpaOnnxVadModelConfig = record
|
||||
SileroVad: SherpaOnnxSileroVadModelConfig;
|
||||
SampleRate: cint32;
|
||||
NumThreads: cint32;
|
||||
Provider: PAnsiChar;
|
||||
Debug: cint32;
|
||||
end;
|
||||
PSherpaOnnxVadModelConfig = ^SherpaOnnxVadModelConfig;
|
||||
|
||||
SherpaOnnxSpeechSegment = record
|
||||
Start: cint32;
|
||||
Samples: pcfloat;
|
||||
N: cint32;
|
||||
end;
|
||||
|
||||
PSherpaOnnxSpeechSegment = ^SherpaOnnxSpeechSegment;
|
||||
|
||||
function SherpaOnnxCreateVoiceActivityDetector(Config: PSherpaOnnxVadModelConfig;
|
||||
BufferSizeInSeconds: cfloat): Pointer; cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
procedure SherpaOnnxDestroyVoiceActivityDetector(Vad: Pointer); cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
procedure SherpaOnnxVoiceActivityDetectorAcceptWaveform(Vad: Pointer;
|
||||
Samples: pcfloat; N: cint32); cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
function SherpaOnnxVoiceActivityDetectorEmpty(Vad: Pointer): cint32; cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
function SherpaOnnxVoiceActivityDetectorDetected(Vad: Pointer): cint32; cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
procedure SherpaOnnxVoiceActivityDetectorPop(Vad: Pointer); cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
procedure SherpaOnnxVoiceActivityDetectorClear(Vad: Pointer); cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
function SherpaOnnxVoiceActivityDetectorFront(Vad: Pointer): PSherpaOnnxSpeechSegment; cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
procedure SherpaOnnxDestroySpeechSegment(P: PSherpaOnnxSpeechSegment); cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
procedure SherpaOnnxVoiceActivityDetectorReset(P: PSherpaOnnxSpeechSegment); cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
procedure SherpaOnnxVoiceActivityDetectorFlush(P: PSherpaOnnxSpeechSegment); cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
function SherpaOnnxCreateCircularBuffer(Capacity: cint32): Pointer; cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
procedure SherpaOnnxDestroyCircularBuffer(Buffer: Pointer) ; cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
procedure SherpaOnnxCircularBufferPush(Buffer: Pointer; Samples: pcfloat; N: cint32); cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
function SherpaOnnxCircularBufferGet(Buffer: Pointer; StartIndex: cint32; N: cint32): pcfloat ; cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
procedure SherpaOnnxCircularBufferFree(P: pcfloat); cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
procedure SherpaOnnxCircularBufferPop(Buffer: Pointer; N: cint32); cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
function SherpaOnnxCircularBufferSize(Buffer: Pointer): cint32; cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
function SherpaOnnxCircularBufferHead(Buffer: Pointer): cint32; cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
procedure SherpaOnnxCircularBufferReset(Buffer: Pointer); cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
function SherpaOnnxCreateOnlineRecognizer(Config: PSherpaOnnxOnlineRecognizerConfig): Pointer; cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
@@ -437,9 +601,20 @@ procedure SherpaOnnxDestroyOfflineStreamResultJson(Json: PAnsiChar); cdecl;
|
||||
function SherpaOnnxReadWaveWrapper(Filename: PAnsiChar): PSherpaOnnxWave; cdecl;
|
||||
external SherpaOnnxLibName name 'SherpaOnnxReadWave';
|
||||
|
||||
function SherpaOnnxWriteWaveWrapper(Samples: pcfloat; N: cint32;
|
||||
SampleRate: cint32; Filename: PAnsiChar): cint32; cdecl;
|
||||
external SherpaOnnxLibName name 'SherpaOnnxWriteWave';
|
||||
|
||||
procedure SherpaOnnxFreeWaveWrapper(P: PSherpaOnnxWave); cdecl;
|
||||
external SherpaOnnxLibName name 'SherpaOnnxFreeWave';
|
||||
|
||||
function SherpaOnnxWriteWave(Filename: AnsiString;
|
||||
Samples: array of Single; SampleRate: Integer): Boolean;
|
||||
begin
|
||||
Result := SherpaOnnxWriteWaveWrapper(pcfloat(Samples), Length(Samples),
|
||||
SampleRate, PAnsiChar(Filename)) = 1;
|
||||
end;
|
||||
|
||||
function SherpaOnnxReadWave(Filename: AnsiString): TSherpaOnnxWave;
|
||||
var
|
||||
PFilename: PAnsiChar;
|
||||
@@ -611,6 +786,7 @@ begin
|
||||
C.BlankPenalty := Config.BlankPenalty;
|
||||
|
||||
Self.Handle := SherpaOnnxCreateOnlineRecognizer(@C);
|
||||
Self._Config := Config;
|
||||
end;
|
||||
|
||||
destructor TSherpaOnnxOnlineRecognizer.Destroy;
|
||||
@@ -877,6 +1053,7 @@ begin
|
||||
C.BlankPenalty := Config.BlankPenalty;
|
||||
|
||||
Self.Handle := SherpaOnnxCreateOfflineRecognizer(@C);
|
||||
Self._Config := Config;
|
||||
end;
|
||||
|
||||
destructor TSherpaOnnxOfflineRecognizer.Destroy;
|
||||
@@ -984,5 +1161,255 @@ begin
|
||||
[Self.Text, TokensStr, TimestampStr]);
|
||||
end;
|
||||
|
||||
function TSherpaOnnxSileroVadModelConfig.ToString: AnsiString;
|
||||
begin
|
||||
Result := Format('TSherpaOnnxSileroVadModelConfig(' +
|
||||
'Model := %s, ' +
|
||||
'Threshold := %.2f, ' +
|
||||
'MinSilenceDuration := %.2f, ' +
|
||||
'MinSpeechDuration := %.2f, ' +
|
||||
'WindowSize := %d' +
|
||||
')',
|
||||
[Self.Model, Self.Threshold, Self.MinSilenceDuration,
|
||||
Self.MinSpeechDuration, Self.WindowSize
|
||||
]);
|
||||
end;
|
||||
|
||||
class operator TSherpaOnnxSileroVadModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxSileroVadModelConfig);
|
||||
begin
|
||||
Dest.Threshold := 0.5;
|
||||
Dest.MinSilenceDuration := 0.5;
|
||||
Dest.MinSpeechDuration := 0.25;
|
||||
Dest.WindowSize := 512;
|
||||
end;
|
||||
|
||||
function TSherpaOnnxVadModelConfig.ToString: AnsiString;
|
||||
begin
|
||||
Result := Format('TSherpaOnnxVadModelConfig(' +
|
||||
'SileroVad := %s, ' +
|
||||
'SampleRate := %d, ' +
|
||||
'NumThreads := %d, ' +
|
||||
'Provider := %s, ' +
|
||||
'Debug := %s' +
|
||||
')',
|
||||
[Self.SileroVad.ToString, Self.SampleRate, Self.NumThreads, Self.Provider,
|
||||
Self.Debug.ToString
|
||||
]);
|
||||
end;
|
||||
|
||||
class operator TSherpaOnnxVadModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxVadModelConfig);
|
||||
begin
|
||||
Dest.SampleRate := 16000;
|
||||
Dest.NumThreads := 1;
|
||||
Dest.Provider := 'cpu';
|
||||
Dest.Debug := False;
|
||||
end;
|
||||
|
||||
class operator TSherpaOnnxFeatureConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxFeatureConfig);
|
||||
begin
|
||||
Dest.SampleRate := 16000;
|
||||
Dest.FeatureDim := 80;
|
||||
end;
|
||||
|
||||
class operator TSherpaOnnxOnlineCtcFstDecoderConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOnlineCtcFstDecoderConfig);
|
||||
begin
|
||||
Dest.MaxActive := 3000;
|
||||
end;
|
||||
|
||||
class operator TSherpaOnnxOnlineRecognizerConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOnlineRecognizerConfig);
|
||||
begin
|
||||
Dest.DecodingMethod := 'greedy_search';
|
||||
Dest.EnableEndpoint := False;
|
||||
Dest.Rule1MinTrailingSilence := 2.4;
|
||||
Dest.Rule2MinTrailingSilence := 1.2;
|
||||
Dest.Rule3MinUtteranceLength := 20;
|
||||
Dest.HotwordsScore := 1.5;
|
||||
Dest.BlankPenalty := 0;
|
||||
end;
|
||||
|
||||
class operator TSherpaOnnxOnlineModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOnlineModelConfig);
|
||||
begin
|
||||
Dest.NumThreads := 1;
|
||||
Dest.Provider := 'cpu';
|
||||
Dest.Debug := False;
|
||||
end;
|
||||
|
||||
class operator TSherpaOnnxOfflineWhisperModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineWhisperModelConfig);
|
||||
begin
|
||||
Dest.Task := 'transcribe';
|
||||
Dest.TailPaddings := -1;
|
||||
end;
|
||||
|
||||
class operator TSherpaOnnxOfflineLMConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineLMConfig);
|
||||
begin
|
||||
Dest.Scale := 1.0;
|
||||
end;
|
||||
|
||||
class operator TSherpaOnnxOfflineSenseVoiceModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineSenseVoiceModelConfig);
|
||||
begin
|
||||
Dest.UseItn := True;
|
||||
end;
|
||||
|
||||
class operator TSherpaOnnxOfflineModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig);
|
||||
begin
|
||||
Dest.NumThreads := 1;
|
||||
Dest.Debug := False;
|
||||
Dest.Provider := 'cpu';
|
||||
end;
|
||||
|
||||
class operator TSherpaOnnxOfflineRecognizerConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineRecognizerConfig);
|
||||
begin
|
||||
Dest.DecodingMethod := 'greedy_search';
|
||||
Dest.MaxActivePaths := 4;
|
||||
Dest.HotwordsScore := 1.5;
|
||||
Dest.BlankPenalty := 0;
|
||||
end;
|
||||
|
||||
constructor TSherpaOnnxCircularBuffer.Create(Capacity: Integer);
|
||||
begin
|
||||
Self.Handle := SherpaOnnxCreateCircularBuffer(Capacity);
|
||||
end;
|
||||
|
||||
destructor TSherpaOnnxCircularBuffer.Destroy;
|
||||
begin
|
||||
SherpaOnnxDestroyCircularBuffer(Self.Handle);
|
||||
Self.Handle := nil;
|
||||
end;
|
||||
|
||||
procedure TSherpaOnnxCircularBuffer.Push(Samples: array of Single);
|
||||
begin
|
||||
SherpaOnnxCircularBufferPush(Self.Handle, pcfloat(Samples), Length(Samples));
|
||||
end;
|
||||
|
||||
function TSherpaOnnxCircularBuffer.Get(StartIndex: Integer; N: Integer): TSherpaOnnxSamplesArray;
|
||||
var
|
||||
P: pcfloat;
|
||||
I: Integer;
|
||||
begin
|
||||
P := SherpaOnnxCircularBufferGet(Self.Handle, StartIndex, N);
|
||||
|
||||
Result := nil;
|
||||
|
||||
SetLength(Result, N);
|
||||
|
||||
for I := Low(Result) to High(Result) do
|
||||
Result[I] := P[I];
|
||||
|
||||
SherpaOnnxCircularBufferFree(P);
|
||||
end;
|
||||
|
||||
procedure TSherpaOnnxCircularBuffer.Pop(N: Integer);
|
||||
begin
|
||||
SherpaOnnxCircularBufferPop(Self.Handle, N);
|
||||
end;
|
||||
|
||||
procedure TSherpaOnnxCircularBuffer.Reset;
|
||||
begin
|
||||
SherpaOnnxCircularBufferReset(Self.Handle);
|
||||
end;
|
||||
|
||||
function TSherpaOnnxCircularBuffer.Size: Integer;
|
||||
begin
|
||||
Result := SherpaOnnxCircularBufferSize(Self.Handle);
|
||||
end;
|
||||
|
||||
function TSherpaOnnxCircularBuffer.Head: Integer;
|
||||
begin
|
||||
Result := SherpaOnnxCircularBufferHead(Self.Handle);
|
||||
end;
|
||||
|
||||
constructor TSherpaOnnxVoiceActivityDetector.Create(Config: TSherpaOnnxVadModelConfig; BufferSizeInSeconds: Single);
|
||||
var
|
||||
C: SherpaOnnxVadModelConfig;
|
||||
begin
|
||||
Self._Config := Config;
|
||||
|
||||
Initialize(C);
|
||||
|
||||
C.SileroVad.Model := PAnsiChar(Config.SileroVad.Model);
|
||||
C.SileroVad.Threshold := Config.SileroVad.Threshold;
|
||||
C.SileroVad.MinSilenceDuration := Config.SileroVad.MinSilenceDuration;
|
||||
C.SileroVad.MinSpeechDuration := Config.SileroVad.MinSpeechDuration;
|
||||
C.SileroVad.WindowSize := Config.SileroVad.WindowSize;
|
||||
|
||||
C.SampleRate := Config.SampleRate;
|
||||
C.NumThreads := Config.NumThreads;
|
||||
C.Provider := PAnsiChar(Config.Provider);
|
||||
C.Debug := Ord(Config.Debug);
|
||||
|
||||
Self.Handle := SherpaOnnxCreateVoiceActivityDetector(@C, BufferSizeInSeconds);
|
||||
end;
|
||||
|
||||
destructor TSherpaOnnxVoiceActivityDetector.Destroy;
|
||||
begin
|
||||
SherpaOnnxDestroyVoiceActivityDetector(Self.Handle);
|
||||
Self.Handle := nil;
|
||||
end;
|
||||
|
||||
procedure TSherpaOnnxVoiceActivityDetector.AcceptWaveform(Samples: array of Single);
|
||||
begin
|
||||
SherpaOnnxVoiceActivityDetectorAcceptWaveform(Self.Handle, pcfloat(Samples), Length(Samples));
|
||||
end;
|
||||
|
||||
procedure TSherpaOnnxVoiceActivityDetector.AcceptWaveform(Samples: array of Single; Offset: Integer; N: Integer);
|
||||
begin
|
||||
if Offset + N > Length(Samples) then
|
||||
begin
|
||||
WriteLn(Format('Invalid arguments!. Array length: %d, Offset: %d, N: %d',
|
||||
[Length(Samples), Offset, N]
|
||||
));
|
||||
Exit;
|
||||
end;
|
||||
|
||||
SherpaOnnxVoiceActivityDetectorAcceptWaveform(Self.Handle,
|
||||
pcfloat(Samples) + Offset, N);
|
||||
end;
|
||||
|
||||
function TSherpaOnnxVoiceActivityDetector.IsEmpty: Boolean;
|
||||
begin
|
||||
Result := SherpaOnnxVoiceActivityDetectorEmpty(Self.Handle) = 1;
|
||||
end;
|
||||
|
||||
function TSherpaOnnxVoiceActivityDetector.IsDetected: Boolean;
|
||||
begin
|
||||
Result := SherpaOnnxVoiceActivityDetectorDetected(Self.Handle) = 1;
|
||||
end;
|
||||
|
||||
procedure TSherpaOnnxVoiceActivityDetector.Pop;
|
||||
begin
|
||||
SherpaOnnxVoiceActivityDetectorPop(Self.Handle);
|
||||
end;
|
||||
|
||||
procedure TSherpaOnnxVoiceActivityDetector.Clear;
|
||||
begin
|
||||
SherpaOnnxVoiceActivityDetectorClear(Self.Handle);
|
||||
end;
|
||||
|
||||
function TSherpaOnnxVoiceActivityDetector.Front: TSherpaOnnxSpeechSegment;
|
||||
var
|
||||
P: PSherpaOnnxSpeechSegment;
|
||||
I: Integer;
|
||||
begin
|
||||
P := SherpaOnnxVoiceActivityDetectorFront(Self.Handle);
|
||||
Result.Start := P^.Start;
|
||||
Result.Samples := nil;
|
||||
SetLength(Result.Samples, P^.N);
|
||||
|
||||
for I := Low(Result.Samples) to High(Result.Samples) do
|
||||
Result.Samples[I] := P^.Samples[I];
|
||||
|
||||
SherpaOnnxDestroySpeechSegment(P);
|
||||
end;
|
||||
|
||||
procedure TSherpaOnnxVoiceActivityDetector.Reset;
|
||||
begin
|
||||
SherpaOnnxVoiceActivityDetectorReset(Self.Handle);
|
||||
end;
|
||||
|
||||
procedure TSherpaOnnxVoiceActivityDetector.Flush;
|
||||
begin
|
||||
SherpaOnnxVoiceActivityDetectorFlush(Self.Handle);
|
||||
end;
|
||||
|
||||
end.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user