This repository has been archived on 2025-08-26. You can view files and clone it, but cannot push or open issues or pull requests.
Files
enginex_bi_series-sherpa-onnx/sherpa-onnx/pascal-api/sherpa_onnx.pas
2024-08-12 23:33:35 +08:00

989 lines
29 KiB
ObjectPascal

{ Copyright (c) 2024 Xiaomi Corporation }
unit sherpa_onnx;
{$mode objfpc}
{$modeSwitch advancedRecords} { to support records with methods }
(* {$LongStrings ON} *)
interface
type
TSherpaOnnxWave = record
Samples: array of Single; { normalized to the range [-1, 1] }
SampleRate: Integer;
end;
TSherpaOnnxOnlineTransducerModelConfig = record
Encoder: AnsiString;
Decoder: AnsiString;
Joiner: AnsiString;
function ToString: AnsiString;
end;
TSherpaOnnxOnlineParaformerModelConfig = record
Encoder: AnsiString;
Decoder: AnsiString;
function ToString: AnsiString;
end;
TSherpaOnnxOnlineZipformer2CtcModelConfig = record
Model: AnsiString;
function ToString: AnsiString;
end;
TSherpaOnnxOnlineModelConfig = record
Transducer: TSherpaOnnxOnlineTransducerModelConfig;
Paraformer: TSherpaOnnxOnlineParaformerModelConfig;
Zipformer2Ctc: TSherpaOnnxOnlineZipformer2CtcModelConfig;
Tokens: AnsiString;
NumThreads: Integer;
Provider: AnsiString;
Debug: Boolean;
ModelType: AnsiString;
ModelingUnit: AnsiString;
BpeVocab: AnsiString;
function ToString: AnsiString;
end;
TSherpaOnnxFeatureConfig = record
SampleRate: Integer;
FeatureDim: Integer;
function ToString: AnsiString;
end;
TSherpaOnnxOnlineCtcFstDecoderConfig = record
Graph: AnsiString;
MaxActive: Integer;
function ToString: AnsiString;
end;
TSherpaOnnxOnlineRecognizerConfig = record
FeatConfig: TSherpaOnnxFeatureConfig;
ModelConfig: TSherpaOnnxOnlineModelConfig;
DecodingMethod: AnsiString;
MaxActivePaths: Integer;
EnableEndpoint: Boolean;
Rule1MinTrailingSilence: Single;
Rule2MinTrailingSilence: Single;
Rule3MinUtteranceLength: Single;
HotwordsFile: AnsiString;
HotwordsScore: Single;
CtcFstDecoderConfig: TSherpaOnnxOnlineCtcFstDecoderConfig;
RuleFsts: AnsiString;
RuleFars: AnsiString;
BlankPenalty: Single;
function ToString: AnsiString;
end;
TSherpaOnnxOnlineRecognizerResult = record
Text: AnsiString;
Tokens: array of AnsiString;
Timestamps: array of Single;
function ToString: AnsiString;
end;
TSherpaOnnxOnlineStream = class
private
Handle: Pointer;
public
constructor Create(P: Pointer);
destructor Destroy; override;
procedure AcceptWaveform(Samples: array of Single; SampleRate: Integer);
procedure InputFinished;
end;
TSherpaOnnxOnlineRecognizer = class
private
Handle: Pointer;
public
constructor Create(Config: TSherpaOnnxOnlineRecognizerConfig);
destructor Destroy; override;
function CreateStream: TSherpaOnnxOnlineStream; overload;
function CreateStream(Hotwords: AnsiString): TSherpaOnnxOnlineStream; overload;
function IsReady(Stream: TSherpaOnnxOnlineStream): Boolean;
procedure Decode(Stream: TSherpaOnnxOnlineStream);
procedure Reset(Stream: TSherpaOnnxOnlineStream);
function IsEndpoint(Stream: TSherpaOnnxOnlineStream): Boolean;
function GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult;
end;
TSherpaOnnxOfflineTransducerModelConfig = record
Encoder: AnsiString;
Decoder: AnsiString;
Joiner: AnsiString;
function ToString: AnsiString;
end;
TSherpaOnnxOfflineParaformerModelConfig = record
Model: AnsiString;
function ToString: AnsiString;
end;
TSherpaOnnxOfflineNemoEncDecCtcModelConfig = record
Model: AnsiString;
function ToString: AnsiString;
end;
TSherpaOnnxOfflineWhisperModelConfig = record
Encoder: AnsiString;
Decoder: AnsiString;
Language: AnsiString;
Task: AnsiString;
TailPaddings: Integer;
function ToString: AnsiString;
end;
TSherpaOnnxOfflineTdnnModelConfig = record
Model: AnsiString;
function ToString: AnsiString;
end;
TSherpaOnnxOfflineLMConfig = record
Model: AnsiString;
Scale: Single;
function ToString: AnsiString;
end;
TSherpaOnnxOfflineSenseVoiceModelConfig = record
Model: AnsiString;
Language: AnsiString;
UseItn: Boolean;
function ToString: AnsiString;
end;
TSherpaOnnxOfflineModelConfig = record
Transducer: TSherpaOnnxOfflineTransducerModelConfig;
Paraformer: TSherpaOnnxOfflineParaformerModelConfig;
NeMoCtc: TSherpaOnnxOfflineNemoEncDecCtcModelConfig;
Whisper: TSherpaOnnxOfflineWhisperModelConfig;
Tdnn: TSherpaOnnxOfflineTdnnModelConfig;
Tokens: AnsiString;
NumThreads: Integer;
Debug: Boolean;
Provider: AnsiString;
ModelType: AnsiString;
ModelingUnit: AnsiString;
BpeVocab: AnsiString;
TeleSpeechCtc: AnsiString;
SenseVoice: TSherpaOnnxOfflineSenseVoiceModelConfig;
function ToString: AnsiString;
end;
TSherpaOnnxOfflineRecognizerConfig = record
FeatConfig: TSherpaOnnxFeatureConfig;
ModelConfig: TSherpaOnnxOfflineModelConfig;
LMConfig: TSherpaOnnxOfflineLMConfig;
DecodingMethod: AnsiString;
MaxActivePaths: Integer;
HotwordsFile: AnsiString;
HotwordsScore: Single;
RuleFsts: AnsiString;
RuleFars: AnsiString;
BlankPenalty: Single;
function ToString: AnsiString;
end;
TSherpaOnnxOfflineRecognizerResult = record
Text: AnsiString;
Tokens: array of AnsiString;
Timestamps: array of Single;
function ToString: AnsiString;
end;
TSherpaOnnxOfflineStream = class
private
Handle: Pointer;
public
constructor Create(P: Pointer);
destructor Destroy; override;
procedure AcceptWaveform(Samples: array of Single; SampleRate: Integer);
end;
TSherpaOnnxOfflineRecognizer = class
private
Handle: Pointer;
public
constructor Create(Config: TSherpaOnnxOfflineRecognizerConfig);
destructor Destroy; override;
function CreateStream: TSherpaOnnxOfflineStream;
procedure Decode(Stream: TSherpaOnnxOfflineStream);
function GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult;
end;
{ It supports reading a single channel wave with 16-bit encoded samples.
Samples are normalized to the range [-1, 1].
}
function SherpaOnnxReadWave(Filename: AnsiString): TSherpaOnnxWave;
implementation
uses
ctypes,
fpjson,
{ See
- https://wiki.freepascal.org/fcl-json
- https://www.freepascal.org/daily/doc/fcl/fpjson/getjson.html
}
jsonparser,
SysUtils;
const
{See https://www.freepascal.org/docs-html/prog/progap7.html}
{$IFDEF WINDOWS}
SherpaOnnxLibName = 'sherpa-onnx-c-api.dll';
{$ENDIF}
{$IFDEF DARWIN}
SherpaOnnxLibName = 'sherpa-onnx-c-api';
{$linklib sherpa-onnx-c-api}
{$ENDIF}
{$IFDEF LINUX}
SherpaOnnxLibName = 'libsherpa-onnx-c-api.so';
{$ENDIF}
type
SherpaOnnxWave = record
Samples: pcfloat;
SampleRate: cint32;
NumSamples: cint32;
end;
PSherpaOnnxWave = ^SherpaOnnxWave;
SherpaOnnxOnlineTransducerModelConfig = record
Encoder: PAnsiChar;
Decoder: PAnsiChar;
Joiner: PAnsiChar;
end;
SherpaOnnxOnlineParaformerModelConfig = record
Encoder: PAnsiChar;
Decoder: PAnsiChar;
end;
SherpaOnnxOnlineZipformer2CtcModelConfig = record
Model: PAnsiChar;
end;
SherpaOnnxOnlineModelConfig= record
Transducer: SherpaOnnxOnlineTransducerModelConfig;
Paraformer: SherpaOnnxOnlineParaformerModelConfig;
Zipformer2Ctc: SherpaOnnxOnlineZipformer2CtcModelConfig;
Tokens: PAnsiChar;
NumThreads: cint32;
Provider: PAnsiChar;
Debug: cint32;
ModelType: PAnsiChar;
ModelingUnit: PAnsiChar;
BpeVocab: PAnsiChar;
end;
SherpaOnnxFeatureConfig = record
SampleRate: cint32;
FeatureDim: cint32;
end;
SherpaOnnxOnlineCtcFstDecoderConfig = record
Graph: PAnsiChar;
MaxActive: cint32;
end;
SherpaOnnxOnlineRecognizerConfig = record
FeatConfig: SherpaOnnxFeatureConfig;
ModelConfig: SherpaOnnxOnlineModelConfig;
DecodingMethod: PAnsiChar;
MaxActivePaths: cint32;
EnableEndpoint: cint32;
Rule1MinTrailingSilence: Single;
Rule2MinTrailingSilence: Single;
Rule3MinUtteranceLength: Single;
HotwordsFile: PAnsiChar;
HotwordsScore: Single;
CtcFstDecoderConfig: SherpaOnnxOnlineCtcFstDecoderConfig;
RuleFsts: PAnsiChar;
RuleFars: PAnsiChar;
BlankPenalty: Single;
end;
PSherpaOnnxOnlineRecognizerConfig = ^SherpaOnnxOnlineRecognizerConfig;
SherpaOnnxOfflineTransducerModelConfig = record
Encoder: PAnsiChar;
Decoder: PAnsiChar;
Joiner: PAnsiChar;
end;
SherpaOnnxOfflineParaformerModelConfig = record
Model: PAnsiChar;
end;
SherpaOnnxOfflineNemoEncDecCtcModelConfig = record
Model: PAnsiChar;
end;
SherpaOnnxOfflineWhisperModelConfig = record
Encoder: PAnsiChar;
Decoder: PAnsiChar;
Language: PAnsiChar;
Task: PAnsiChar;
TailPaddings: cint32;
end;
SherpaOnnxOfflineTdnnModelConfig = record
Model: PAnsiChar;
end;
SherpaOnnxOfflineLMConfig = record
Model: PAnsiChar;
Scale: Single;
end;
SherpaOnnxOfflineSenseVoiceModelConfig = record
Model: PAnsiChar;
Language: PAnsiChar;
UseItn: cint32;
end;
SherpaOnnxOfflineModelConfig = record
Transducer: SherpaOnnxOfflineTransducerModelConfig;
Paraformer: SherpaOnnxOfflineParaformerModelConfig;
NeMoCtc: SherpaOnnxOfflineNemoEncDecCtcModelConfig;
Whisper: SherpaOnnxOfflineWhisperModelConfig;
Tdnn: SherpaOnnxOfflineTdnnModelConfig;
Tokens: PAnsiChar;
NumThreads: cint32;
Debug: cint32;
Provider: PAnsiChar;
ModelType: PAnsiChar;
ModelingUnit: PAnsiChar;
BpeVocab: PAnsiChar;
TeleSpeechCtc: PAnsiChar;
SenseVoice: SherpaOnnxOfflineSenseVoiceModelConfig;
end;
SherpaOnnxOfflineRecognizerConfig = record
FeatConfig: SherpaOnnxFeatureConfig;
ModelConfig: SherpaOnnxOfflineModelConfig;
LMConfig: SherpaOnnxOfflineLMConfig;
DecodingMethod: PAnsiChar;
MaxActivePaths: cint32;
HotwordsFile: PAnsiChar;
HotwordsScore: Single;
RuleFsts: PAnsiChar;
RuleFars: PAnsiChar;
BlankPenalty: Single;
end;
PSherpaOnnxOfflineRecognizerConfig = ^SherpaOnnxOfflineRecognizerConfig;
function SherpaOnnxCreateOnlineRecognizer(Config: PSherpaOnnxOnlineRecognizerConfig): Pointer; cdecl;
external SherpaOnnxLibName;
procedure SherpaOnnxDestroyOnlineRecognizer(Recognizer: Pointer); cdecl;
external SherpaOnnxLibName;
function SherpaOnnxCreateOnlineStream(Recognizer: Pointer): Pointer; cdecl;
external SherpaOnnxLibName;
function SherpaOnnxCreateOnlineStreamWithHotwords(Recognizer: Pointer; Hotwords: PAnsiChar): Pointer; cdecl;
external SherpaOnnxLibName;
procedure SherpaOnnxDestroyOnlineStream(Recognizer: Pointer); cdecl;
external SherpaOnnxLibName;
procedure SherpaOnnxOnlineStreamAcceptWaveform(Stream: Pointer;
SampleRate: cint32; Samples: pcfloat; N: cint32 ); cdecl;
external SherpaOnnxLibName;
procedure SherpaOnnxOnlineStreamInputFinished(Stream: Pointer); cdecl;
external SherpaOnnxLibName;
function SherpaOnnxIsOnlineStreamReady(Recognizer: Pointer; Stream: Pointer): cint32; cdecl;
external SherpaOnnxLibName;
procedure SherpaOnnxDecodeOnlineStream(Recognizer: Pointer; Stream: Pointer); cdecl;
external SherpaOnnxLibName;
procedure SherpaOnnxOnlineStreamReset(Recognizer: Pointer; Stream: Pointer); cdecl;
external SherpaOnnxLibName;
function SherpaOnnxOnlineStreamIsEndpoint(Recognizer: Pointer; Stream: Pointer): cint32; cdecl;
external SherpaOnnxLibName;
function SherpaOnnxGetOnlineStreamResultAsJson(Recognizer: Pointer; Stream: Pointer): PAnsiChar; cdecl;
external SherpaOnnxLibName;
procedure SherpaOnnxDestroyOnlineStreamResultJson(PJson: PAnsiChar); cdecl;
external SherpaOnnxLibName;
function SherpaOnnxCreateOfflineRecognizer(Config: PSherpaOnnxOfflineRecognizerConfig): Pointer; cdecl;
external SherpaOnnxLibName;
procedure SherpaOnnxDestroyOfflineRecognizer(Recognizer: Pointer); cdecl;
external SherpaOnnxLibName;
function SherpaOnnxCreateOfflineStream(Recognizer: Pointer): Pointer; cdecl;
external SherpaOnnxLibName;
procedure SherpaOnnxDestroyOfflineStream(Stream: Pointer); cdecl;
external SherpaOnnxLibName;
procedure SherpaOnnxAcceptWaveformOffline(Stream: Pointer;
SampleRate: cint32; Samples: pcfloat; N: cint32); cdecl;
external SherpaOnnxLibName;
procedure SherpaOnnxDecodeOfflineStream(Recognizer: Pointer; Stream: Pointer); cdecl;
external SherpaOnnxLibName;
function SherpaOnnxGetOfflineStreamResultAsJson(Stream: Pointer): PAnsiChar; cdecl;
external SherpaOnnxLibName;
procedure SherpaOnnxDestroyOfflineStreamResultJson(Json: PAnsiChar); cdecl;
external SherpaOnnxLibName;
function SherpaOnnxReadWaveWrapper(Filename: PAnsiChar): PSherpaOnnxWave; cdecl;
external SherpaOnnxLibName name 'SherpaOnnxReadWave';
procedure SherpaOnnxFreeWaveWrapper(P: PSherpaOnnxWave); cdecl;
external SherpaOnnxLibName name 'SherpaOnnxFreeWave';
function SherpaOnnxReadWave(Filename: AnsiString): TSherpaOnnxWave;
var
PFilename: PAnsiChar;
PWave: PSherpaOnnxWave;
I: Integer;
begin
PFilename := PAnsiChar(Filename);
PWave := SherpaOnnxReadWaveWrapper(PFilename);
Result.Samples := nil;
SetLength(Result.Samples, PWave^.NumSamples);
Result.SampleRate := PWave^.SampleRate;
for I := Low(Result.Samples) to High(Result.Samples) do
Result.Samples[I] := PWave^.Samples[I];
SherpaOnnxFreeWaveWrapper(PWave);
end;
function TSherpaOnnxOnlineTransducerModelConfig.ToString: AnsiString;
begin
Result := Format('TSherpaOnnxOnlineTransducerModelConfig(Encoder := %s, Decoder := %s, Joiner := %s)',
[Self.Encoder, Self.Decoder, Self.Joiner]);
end;
function TSherpaOnnxOnlineParaformerModelConfig.ToString: AnsiString;
begin
Result := Format('TSherpaOnnxOnlineParaformerModelConfig(Encoder := %s, Decoder := %s)',
[Self.Encoder, Self.Decoder]);
end;
function TSherpaOnnxOnlineZipformer2CtcModelConfig.ToString: AnsiString;
begin
Result := Format('TSherpaOnnxOnlineZipformer2CtcModelConfig(Model := %s)',
[Self.Model]);
end;
function TSherpaOnnxOnlineModelConfig.ToString: AnsiString;
begin
Result := Format('TSherpaOnnxOnlineModelConfig(Transducer := %s, ' +
'Paraformer := %s,' +
'Zipformer2Ctc := %s, ' +
'Tokens := %s, ' +
'NumThreads := %d, ' +
'Provider := %s, ' +
'Debug := %s, ' +
'ModelType := %s, ' +
'ModelingUnit := %s, ' +
'BpeVocab := %s)'
,
[Self.Transducer.ToString, Self.Paraformer.ToString,
Self.Zipformer2Ctc.ToString, Self.Tokens,
Self.NumThreads, Self.Provider, Self.Debug.ToString,
Self.ModelType, Self.ModelingUnit, Self.BpeVocab
]);
end;
function TSherpaOnnxFeatureConfig.ToString: AnsiString;
begin
Result := Format('TSherpaOnnxFeatureConfig(SampleRate := %d, FeatureDim := %d)',
[Self.SampleRate, Self.FeatureDim]);
end;
function TSherpaOnnxOnlineCtcFstDecoderConfig.ToString: AnsiString;
begin
Result := Format('TSherpaOnnxOnlineCtcFstDecoderConfig(Graph := %s, MaxActive := %d)',
[Self.Graph, Self.MaxActive]);
end;
function TSherpaOnnxOnlineRecognizerConfig.ToString: AnsiString;
begin
Result := Format('TSherpaOnnxOnlineRecognizerConfig(FeatConfig := %s, ' +
'ModelConfig := %s, ' +
'DecodingMethod := %s, ' +
'MaxActivePaths := %d, ' +
'EnableEndpoint := %s, ' +
'Rule1MinTrailingSilence := %.1f, ' +
'Rule2MinTrailingSilence := %.1f, ' +
'Rule3MinUtteranceLength := %.1f, ' +
'HotwordsFile := %s, ' +
'HotwordsScore := %.1f, ' +
'CtcFstDecoderConfig := %s, ' +
'RuleFsts := %s, ' +
'RuleFars := %s, ' +
'BlankPenalty := %.1f' +
')'
,
[Self.FeatConfig.ToString, Self.ModelConfig.ToString,
Self.DecodingMethod, Self.MaxActivePaths, Self.EnableEndpoint.ToString,
Self.Rule1MinTrailingSilence, Self.Rule2MinTrailingSilence,
Self.Rule3MinUtteranceLength, Self.HotwordsFile, Self.HotwordsScore,
Self.CtcFstDecoderConfig.ToString, Self.RuleFsts, Self.RuleFars,
Self.BlankPenalty
]);
end;
function TSherpaOnnxOnlineRecognizerResult.ToString: AnsiString;
var
TokensStr: AnsiString;
S: AnsiString;
TimestampStr: AnsiString;
T: Single;
Sep: AnsiString;
begin
TokensStr := '[';
Sep := '';
for S in Self.Tokens do
begin
TokensStr := TokensStr + Sep + S;
Sep := ', ';
end;
TokensStr := TokensStr + ']';
TimestampStr := '[';
Sep := '';
for T in Self.Timestamps do
begin
TimestampStr := TimestampStr + Sep + Format('%.2f', [T]);
Sep := ', ';
end;
TimestampStr := TimestampStr + ']';
Result := Format('TSherpaOnnxOnlineRecognizerResult(Text := %s, ' +
'Tokens := %s, ' +
'Timestamps := %s' +
')',
[Self.Text, TokensStr, TimestampStr]);
end;
constructor TSherpaOnnxOnlineRecognizer.Create(Config: TSherpaOnnxOnlineRecognizerConfig);
var
C: SherpaOnnxOnlineRecognizerConfig;
begin
Initialize(C);
C.FeatConfig.SampleRate := Config.FeatConfig.SampleRate;
C.FeatConfig.FeatureDim := Config.FeatConfig.FeatureDim;
C.ModelConfig.Transducer.Encoder := PAnsiChar(Config.ModelConfig.Transducer.Encoder);
C.ModelConfig.Transducer.Decoder := PAnsiChar(Config.ModelConfig.Transducer.Decoder);
C.ModelConfig.Transducer.Joiner := PAnsiChar(Config.ModelConfig.Transducer.Joiner);
C.ModelConfig.Paraformer.Encoder := PAnsiChar(Config.ModelConfig.Paraformer.Encoder);
C.ModelConfig.Paraformer.Decoder := PAnsiChar(Config.ModelConfig.Paraformer.Decoder);
C.ModelConfig.Zipformer2Ctc.Model := PAnsiChar(Config.ModelConfig.Zipformer2Ctc.Model);
C.ModelConfig.Tokens := PAnsiChar(Config.ModelConfig.Tokens);
C.ModelConfig.NumThreads := Config.ModelConfig.NumThreads;
C.ModelConfig.Provider := PAnsiChar(Config.ModelConfig.Provider);
C.ModelConfig.Debug := Ord(Config.ModelConfig.Debug);
C.ModelConfig.ModelType := PAnsiChar(Config.ModelConfig.ModelType);
C.ModelConfig.ModelingUnit := PAnsiChar(Config.ModelConfig.ModelingUnit);
C.ModelConfig.BpeVocab := PAnsiChar(Config.ModelConfig.BpeVocab);
C.DecodingMethod := PAnsiChar(Config.DecodingMethod);
C.MaxActivePaths := Config.MaxActivePaths;
C.EnableEndpoint := Ord(Config.EnableEndpoint);
C.Rule1MinTrailingSilence := Config.Rule1MinTrailingSilence;
C.Rule2MinTrailingSilence := Config.Rule2MinTrailingSilence;
C.Rule3MinUtteranceLength := Config.Rule3MinUtteranceLength;
C.HotwordsFile := PAnsiChar(Config.HotwordsFile);
C.HotwordsScore := Config.HotwordsScore;
C.CtcFstDecoderConfig.Graph := PAnsiChar(Config.CtcFstDecoderConfig.Graph);
C.CtcFstDecoderConfig.MaxActive := Config.CtcFstDecoderConfig.MaxActive;
C.RuleFsts := PAnsiChar(Config.RuleFsts);
C.RuleFars := PAnsiChar(Config.RuleFars);
C.BlankPenalty := Config.BlankPenalty;
Self.Handle := SherpaOnnxCreateOnlineRecognizer(@C);
end;
destructor TSherpaOnnxOnlineRecognizer.Destroy;
begin
SherpaOnnxDestroyOnlineRecognizer(Self.Handle);
Self.Handle := nil;
end;
function TSherpaOnnxOnlineRecognizer.CreateStream: TSherpaOnnxOnlineStream;
var
Stream: Pointer;
begin
Stream := SherpaOnnxCreateOnlineStream(Self.Handle);
Result := TSherpaOnnxOnlineStream.Create(Stream);
end;
function TSherpaOnnxOnlineRecognizer.CreateStream(Hotwords: AnsiString): TSherpaOnnxOnlineStream;
var
Stream: Pointer;
begin
Stream := SherpaOnnxCreateOnlineStreamWithHotwords(Self.Handle, PAnsiChar(Hotwords));
Result := TSherpaOnnxOnlineStream.Create(Stream);
end;
function TSherpaOnnxOnlineRecognizer.IsReady(Stream: TSherpaOnnxOnlineStream): Boolean;
begin
Result := SherpaOnnxIsOnlineStreamReady(Self.Handle, Stream.Handle) = 1;
end;
procedure TSherpaOnnxOnlineRecognizer.Decode(Stream: TSherpaOnnxOnlineStream);
begin
SherpaOnnxDecodeOnlineStream(Self.Handle, Stream.Handle);
end;
procedure TSherpaOnnxOnlineRecognizer.Reset(Stream: TSherpaOnnxOnlineStream);
begin
SherpaOnnxOnlineStreamReset(Self.Handle, Stream.Handle);
end;
function TSherpaOnnxOnlineRecognizer.IsEndpoint(Stream: TSherpaOnnxOnlineStream): Boolean;
begin
Result := SherpaOnnxOnlineStreamIsEndpoint(Self.Handle, Stream.Handle) = 1;
end;
function TSherpaOnnxOnlineRecognizer.GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult;
var
pJson: PAnsiChar;
JsonData: TJSONData;
JsonObject : TJSONObject;
JsonEnum: TJSONEnum;
I: Integer;
begin
pJson := SherpaOnnxGetOnlineStreamResultAsJson(Self.Handle, Stream.Handle);
{
- https://www.freepascal.org/daily/doc/fcl/fpjson/getjson.html
- https://www.freepascal.org/daily/doc/fcl/fpjson/tjsondata.html
- https://www.freepascal.org/daily/doc/fcl/fpjson/tjsonobject.html
- https://www.freepascal.org/daily/doc/fcl/fpjson/tjsonenum.html
}
JsonData := GetJSON(AnsiString(pJson), False);
JsonObject := JsonData as TJSONObject;
Result.Text := JsonObject.Strings['text'];
SetLength(Result.Tokens, JsonObject.Arrays['tokens'].Count);
I := 0;
for JsonEnum in JsonObject.Arrays['tokens'] do
begin
Result.Tokens[I] := JsonEnum.Value.AsString;
Inc(I);
end;
SetLength(Result.Timestamps, JsonObject.Arrays['timestamps'].Count);
I := 0;
for JsonEnum in JsonObject.Arrays['timestamps'] do
begin
Result.Timestamps[I] := JsonEnum.Value.AsFloat;
Inc(I);
end;
SherpaOnnxDestroyOnlineStreamResultJson(pJson);
end;
constructor TSherpaOnnxOnlineStream.Create(P: Pointer);
begin
Self.Handle := P;
end;
destructor TSherpaOnnxOnlineStream.Destroy;
begin
SherpaOnnxDestroyOnlineStream(Self.Handle);
Self.Handle := nil;
end;
procedure TSherpaOnnxOnlineStream.AcceptWaveform(Samples: array of Single; SampleRate: Integer);
begin
SherpaOnnxOnlineStreamAcceptWaveform(Self.Handle, SampleRate,
pcfloat(Samples), Length(Samples));
end;
procedure TSherpaOnnxOnlineStream.InputFinished;
begin
SherpaOnnxOnlineStreamInputFinished(Self.Handle);
end;
function TSherpaOnnxOfflineTransducerModelConfig.ToString: AnsiString;
begin
Result := Format('TSherpaOnnxOfflineTransducerModelConfig(' +
'Encoder := %s, ' +
'Decoder := %s, ' +
'Joiner := %s' +
')',
[Self.Encoder, Self.Decoder, Self.Joiner]);
end;
function TSherpaOnnxOfflineParaformerModelConfig.ToString: AnsiString;
begin
Result := Format('TSherpaOnnxOfflineParaformerModelConfig(Model := %s)',
[Self.Model]);
end;
function TSherpaOnnxOfflineNemoEncDecCtcModelConfig.ToString: AnsiString;
begin
Result := Format('TSherpaOnnxOfflineNemoEncDecCtcModelConfig(Model := %s)',
[Self.Model]);
end;
function TSherpaOnnxOfflineWhisperModelConfig.ToString: AnsiString;
begin
Result := Format('TSherpaOnnxOfflineWhisperModelConfig(' +
'Encoder := %s, ' +
'Decoder := %s, ' +
'Language := %s, ' +
'Task := %s, ' +
'TailPaddings := %d' +
')',
[Self.Encoder, Self.Decoder, Self.Language, Self.Task, Self.TailPaddings]);
end;
function TSherpaOnnxOfflineTdnnModelConfig.ToString: AnsiString;
begin
Result := Format('TSherpaOnnxOfflineTdnnModelConfig(Model := %s)',
[Self.Model]);
end;
function TSherpaOnnxOfflineLMConfig.ToString: AnsiString;
begin
Result := Format('TSherpaOnnxOfflineLMConfig(' +
'Model := %s, ' +
'Scale := %.1f' +
')',
[Self.Model, Self.Scale]);
end;
function TSherpaOnnxOfflineSenseVoiceModelConfig.ToString: AnsiString;
begin
Result := Format('TSherpaOnnxOfflineSenseVoiceModelConfig(' +
'Model := %s, ' +
'Language := %s, ' +
'UseItn := %s' +
')',
[Self.Model, Self.Language, Self.UseItn.ToString]);
end;
function TSherpaOnnxOfflineModelConfig.ToString: AnsiString;
begin
Result := Format('TSherpaOnnxOfflineModelConfig(' +
'Transducer := %s, ' +
'Paraformer := %s, ' +
'NeMoCtc := %s, ' +
'Whisper := %s, ' +
'Tdnn := %s, ' +
'Tokens := %s, ' +
'NumThreads := %d, ' +
'Debug := %s, ' +
'Provider := %s, ' +
'ModelType := %s, ' +
'ModelingUnit := %s, ' +
'BpeVocab := %s, ' +
'TeleSpeechCtc := %s, ' +
'SenseVoice := %s' +
')',
[Self.Transducer.ToString, Self.Paraformer.ToString,
Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString,
Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider,
Self.ModelType, Self.ModelingUnit, Self.BpeVocab,
Self.TeleSpeechCtc, Self.SenseVoice.ToString
]);
end;
function TSherpaOnnxOfflineRecognizerConfig.ToString: AnsiString;
begin
Result := Format('TSherpaOnnxOfflineRecognizerConfig(' +
'FeatConfig := %s, ' +
'ModelConfig := %s, ' +
'LMConfig := %s, ' +
'DecodingMethod := %s, ' +
'MaxActivePaths := %d, ' +
'HotwordsFile := %s, ' +
'HotwordsScore := %.1f, ' +
'RuleFsts := %s, ' +
'RuleFars := %s, ' +
'BlankPenalty := %1.f' +
')',
[Self.FeatConfig.ToString, Self.ModelConfig.ToString,
Self.LMConfig.ToString, Self.DecodingMethod, Self.MaxActivePaths,
Self.HotwordsFile, Self.HotwordsScore, Self.RuleFsts, Self.RuleFars,
Self.BlankPenalty
]);
end;
constructor TSherpaOnnxOfflineRecognizer.Create(Config: TSherpaOnnxOfflineRecognizerConfig);
var
C: SherpaOnnxOfflineRecognizerConfig;
begin
Initialize(C);
C.FeatConfig.SampleRate := Config.FeatConfig.SampleRate;
C.FeatConfig.FeatureDim := Config.FeatConfig.FeatureDim;
C.ModelConfig.Transducer.Encoder := PAnsiChar(Config.ModelConfig.Transducer.Encoder);
C.ModelConfig.Transducer.Decoder := PAnsiChar(Config.ModelConfig.Transducer.Decoder);
C.ModelConfig.Transducer.Joiner := PAnsiChar(Config.ModelConfig.Transducer.Joiner);
C.ModelConfig.Paraformer.Model := PAnsiChar(Config.ModelConfig.Paraformer.Model);
C.ModelConfig.NeMoCtc.Model := PAnsiChar(Config.ModelConfig.NeMoCtc.Model);
C.ModelConfig.Whisper.Encoder := PAnsiChar(Config.ModelConfig.Whisper.Encoder);
C.ModelConfig.Whisper.Decoder := PAnsiChar(Config.ModelConfig.Whisper.Decoder);
C.ModelConfig.Whisper.Language := PAnsiChar(Config.ModelConfig.Whisper.Language);
C.ModelConfig.Whisper.Task := PAnsiChar(Config.ModelConfig.Whisper.Task);
C.ModelConfig.Whisper.TailPaddings := Config.ModelConfig.Whisper.TailPaddings;
C.ModelConfig.Tdnn.Model := PAnsiChar(Config.ModelConfig.Tdnn.Model);
C.ModelConfig.Tokens := PAnsiChar(Config.ModelConfig.Tokens);
C.ModelConfig.NumThreads := Config.ModelConfig.NumThreads;
C.ModelConfig.Debug := Ord(Config.ModelConfig.Debug);
C.ModelConfig.Provider := PAnsiChar(Config.ModelConfig.Provider);
C.ModelConfig.ModelType := PAnsiChar(Config.ModelConfig.ModelType);
C.ModelConfig.ModelingUnit := PAnsiChar(Config.ModelConfig.ModelingUnit);
C.ModelConfig.BpeVocab := PAnsiChar(Config.ModelConfig.BpeVocab);
C.ModelConfig.TeleSpeechCtc := PAnsiChar(Config.ModelConfig.TeleSpeechCtc);
C.ModelConfig.SenseVoice.Model := PAnsiChar(Config.ModelConfig.SenseVoice.Model);
C.ModelConfig.SenseVoice.Language := PAnsiChar(Config.ModelConfig.SenseVoice.Language);
C.ModelConfig.SenseVoice.UseItn := Ord(Config.ModelConfig.SenseVoice.UseItn);
C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model);
C.LMConfig.Scale := Config.LMConfig.Scale;
C.DecodingMethod := PAnsiChar(Config.DecodingMethod);
C.MaxActivePaths := Config.MaxActivePaths;
C.HotwordsFile := PAnsiChar(Config.HotwordsFile);
C.HotwordsScore := Config.HotwordsScore;
C.RuleFsts := PAnsiChar(Config.RuleFsts);
C.RuleFars := PAnsiChar(Config.RuleFars);
C.BlankPenalty := Config.BlankPenalty;
Self.Handle := SherpaOnnxCreateOfflineRecognizer(@C);
end;
destructor TSherpaOnnxOfflineRecognizer.Destroy;
begin
SherpaOnnxDestroyOfflineRecognizer(Self.Handle);
Self.Handle := nil;
end;
function TSherpaOnnxOfflineRecognizer.CreateStream: TSherpaOnnxOfflineStream;
var
Stream: Pointer;
begin
Stream := SherpaOnnxCreateOfflineStream(Self.Handle);
Result := TSherpaOnnxOfflineStream.Create(Stream);
end;
procedure TSherpaOnnxOfflineRecognizer.Decode(Stream: TSherpaOnnxOfflineStream);
begin
SherpaOnnxDecodeOfflineStream(Self.Handle, Stream.Handle);
end;
function TSherpaOnnxOfflineRecognizer.GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult;
var
pJson: PAnsiChar;
JsonData: TJSONData;
JsonObject : TJSONObject;
JsonEnum: TJSONEnum;
I: Integer;
begin
pJson := SherpaOnnxGetOfflineStreamResultAsJson(Stream.Handle);
JsonData := GetJSON(AnsiString(pJson), False);
JsonObject := JsonData as TJSONObject;
Result.Text := JsonObject.Strings['text'];
SetLength(Result.Tokens, JsonObject.Arrays['tokens'].Count);
I := 0;
for JsonEnum in JsonObject.Arrays['tokens'] do
begin
Result.Tokens[I] := JsonEnum.Value.AsString;
Inc(I);
end;
SetLength(Result.Timestamps, JsonObject.Arrays['timestamps'].Count);
I := 0;
for JsonEnum in JsonObject.Arrays['timestamps'] do
begin
Result.Timestamps[I] := JsonEnum.Value.AsFloat;
Inc(I);
end;
SherpaOnnxDestroyOfflineStreamResultJson(pJson);
end;
constructor TSherpaOnnxOfflineStream.Create(P: Pointer);
begin
Self.Handle := P;
end;
destructor TSherpaOnnxOfflineStream.Destroy;
begin
SherpaOnnxDestroyOfflineStream(Self.Handle);
Self.Handle := nil;
end;
procedure TSherpaOnnxOfflineStream.AcceptWaveform(Samples: array of Single; SampleRate: Integer);
begin
SherpaOnnxAcceptWaveformOffline(Self.Handle, SampleRate, pcfloat(Samples),
Length(Samples));
end;
function TSherpaOnnxOfflineRecognizerResult.ToString: AnsiString;
var
TokensStr: AnsiString;
S: AnsiString;
TimestampStr: AnsiString;
T: Single;
Sep: AnsiString;
begin
TokensStr := '[';
Sep := '';
for S in Self.Tokens do
begin
TokensStr := TokensStr + Sep + S;
Sep := ', ';
end;
TokensStr := TokensStr + ']';
TimestampStr := '[';
Sep := '';
for T in Self.Timestamps do
begin
TimestampStr := TimestampStr + Sep + Format('%.2f', [T]);
Sep := ', ';
end;
TimestampStr := TimestampStr + ']';
Result := Format('TSherpaOnnxOfflineRecognizerResult(Text := %s, ' +
'Tokens := %s, ' +
'Timestamps := %s' +
')',
[Self.Text, TokensStr, TimestampStr]);
end;
end.