Pascal API for streaming ASR (#1246)
This commit is contained in:
@@ -75,17 +75,31 @@ SherpaOnnxOnlineRecognizer *SherpaOnnxCreateOnlineRecognizer(
|
||||
SHERPA_ONNX_OR(config->model_config.num_threads, 1);
|
||||
recognizer_config.model_config.provider_config.provider =
|
||||
SHERPA_ONNX_OR(config->model_config.provider, "cpu");
|
||||
|
||||
if (recognizer_config.model_config.provider_config.provider.empty()) {
|
||||
recognizer_config.model_config.provider_config.provider = "cpu";
|
||||
}
|
||||
|
||||
recognizer_config.model_config.model_type =
|
||||
SHERPA_ONNX_OR(config->model_config.model_type, "");
|
||||
recognizer_config.model_config.debug =
|
||||
SHERPA_ONNX_OR(config->model_config.debug, 0);
|
||||
recognizer_config.model_config.modeling_unit =
|
||||
SHERPA_ONNX_OR(config->model_config.modeling_unit, "cjkchar");
|
||||
|
||||
if (recognizer_config.model_config.modeling_unit.empty()) {
|
||||
recognizer_config.model_config.modeling_unit = "cjkchar";
|
||||
}
|
||||
|
||||
recognizer_config.model_config.bpe_vocab =
|
||||
SHERPA_ONNX_OR(config->model_config.bpe_vocab, "");
|
||||
|
||||
recognizer_config.decoding_method =
|
||||
SHERPA_ONNX_OR(config->decoding_method, "greedy_search");
|
||||
if (recognizer_config.decoding_method.empty()) {
|
||||
recognizer_config.decoding_method = "greedy_search";
|
||||
}
|
||||
|
||||
recognizer_config.max_active_paths =
|
||||
SHERPA_ONNX_OR(config->max_active_paths, 4);
|
||||
|
||||
@@ -391,10 +405,19 @@ sherpa_onnx::OfflineRecognizerConfig convertConfig(
|
||||
SHERPA_ONNX_OR(config->model_config.debug, 0);
|
||||
recognizer_config.model_config.provider =
|
||||
SHERPA_ONNX_OR(config->model_config.provider, "cpu");
|
||||
if (recognizer_config.model_config.provider.empty()) {
|
||||
recognizer_config.model_config.provider = "cpu";
|
||||
}
|
||||
|
||||
recognizer_config.model_config.model_type =
|
||||
SHERPA_ONNX_OR(config->model_config.model_type, "");
|
||||
recognizer_config.model_config.modeling_unit =
|
||||
SHERPA_ONNX_OR(config->model_config.modeling_unit, "cjkchar");
|
||||
|
||||
if (recognizer_config.model_config.modeling_unit.empty()) {
|
||||
recognizer_config.model_config.modeling_unit = "cjkchar";
|
||||
}
|
||||
|
||||
recognizer_config.model_config.bpe_vocab =
|
||||
SHERPA_ONNX_OR(config->model_config.bpe_vocab, "");
|
||||
|
||||
@@ -620,6 +643,10 @@ SherpaOnnxKeywordSpotter *SherpaOnnxCreateKeywordSpotter(
|
||||
SHERPA_ONNX_OR(config->model_config.num_threads, 1);
|
||||
spotter_config.model_config.provider_config.provider =
|
||||
SHERPA_ONNX_OR(config->model_config.provider, "cpu");
|
||||
if (spotter_config.model_config.provider_config.provider.empty()) {
|
||||
spotter_config.model_config.provider_config.provider = "cpu";
|
||||
}
|
||||
|
||||
spotter_config.model_config.model_type =
|
||||
SHERPA_ONNX_OR(config->model_config.model_type, "");
|
||||
spotter_config.model_config.debug =
|
||||
@@ -855,6 +882,10 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector(
|
||||
vad_config.sample_rate = SHERPA_ONNX_OR(config->sample_rate, 16000);
|
||||
vad_config.num_threads = SHERPA_ONNX_OR(config->num_threads, 1);
|
||||
vad_config.provider = SHERPA_ONNX_OR(config->provider, "cpu");
|
||||
if (vad_config.provider.empty()) {
|
||||
vad_config.provider = "cpu";
|
||||
}
|
||||
|
||||
vad_config.debug = SHERPA_ONNX_OR(config->debug, false);
|
||||
|
||||
if (vad_config.debug) {
|
||||
@@ -956,6 +987,10 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
|
||||
tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
|
||||
tts_config.model.debug = config->model.debug;
|
||||
tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
|
||||
if (tts_config.model.provider.empty()) {
|
||||
tts_config.model.provider = "cpu";
|
||||
}
|
||||
|
||||
tts_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, "");
|
||||
tts_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, "");
|
||||
tts_config.max_num_sentences = SHERPA_ONNX_OR(config->max_num_sentences, 2);
|
||||
@@ -1101,6 +1136,9 @@ SherpaOnnxCreateSpokenLanguageIdentification(
|
||||
slid_config.num_threads = SHERPA_ONNX_OR(config->num_threads, 1);
|
||||
slid_config.debug = config->debug;
|
||||
slid_config.provider = SHERPA_ONNX_OR(config->provider, "cpu");
|
||||
if (slid_config.provider.empty()) {
|
||||
slid_config.provider = "cpu";
|
||||
}
|
||||
|
||||
if (slid_config.debug) {
|
||||
SHERPA_ONNX_LOGE("%s\n", slid_config.ToString().c_str());
|
||||
@@ -1167,6 +1205,9 @@ SherpaOnnxCreateSpeakerEmbeddingExtractor(
|
||||
c.num_threads = SHERPA_ONNX_OR(config->num_threads, 1);
|
||||
c.debug = SHERPA_ONNX_OR(config->debug, 0);
|
||||
c.provider = SHERPA_ONNX_OR(config->provider, "cpu");
|
||||
if (c.provider.empty()) {
|
||||
c.provider = "cpu";
|
||||
}
|
||||
|
||||
if (config->debug) {
|
||||
SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str());
|
||||
@@ -1401,6 +1442,10 @@ const SherpaOnnxAudioTagging *SherpaOnnxCreateAudioTagging(
|
||||
ac.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
|
||||
ac.model.debug = config->model.debug;
|
||||
ac.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
|
||||
if (ac.model.provider.empty()) {
|
||||
ac.model.provider = "cpu";
|
||||
}
|
||||
|
||||
ac.labels = SHERPA_ONNX_OR(config->labels, "");
|
||||
ac.top_k = SHERPA_ONNX_OR(config->top_k, 5);
|
||||
|
||||
@@ -1487,6 +1532,9 @@ const SherpaOnnxOfflinePunctuation *SherpaOnnxCreateOfflinePunctuation(
|
||||
c.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
|
||||
c.model.debug = config->model.debug;
|
||||
c.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
|
||||
if (c.model.provider.empty()) {
|
||||
c.model.provider = "cpu";
|
||||
}
|
||||
|
||||
if (c.model.debug) {
|
||||
SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str());
|
||||
|
||||
@@ -4,6 +4,9 @@ unit sherpa_onnx;
|
||||
|
||||
{$mode objfpc}
|
||||
|
||||
{$modeSwitch advancedRecords} { to support records with methods }
|
||||
(* {$LongStrings ON} *)
|
||||
|
||||
interface
|
||||
|
||||
type
|
||||
@@ -12,15 +15,117 @@ type
|
||||
SampleRate: Integer;
|
||||
end;
|
||||
|
||||
TSherpaOnnxOnlineTransducerModelConfig = record
|
||||
Encoder: AnsiString;
|
||||
Decoder: AnsiString;
|
||||
Joiner: AnsiString;
|
||||
function ToString: AnsiString;
|
||||
end;
|
||||
|
||||
TSherpaOnnxOnlineParaformerModelConfig = record
|
||||
Encoder: AnsiString;
|
||||
Decoder: AnsiString;
|
||||
function ToString: AnsiString;
|
||||
end;
|
||||
|
||||
TSherpaOnnxOnlineZipformer2CtcModelConfig = record
|
||||
Model: AnsiString;
|
||||
function ToString: AnsiString;
|
||||
end;
|
||||
|
||||
TSherpaOnnxOnlineModelConfig = record
|
||||
Transducer: TSherpaOnnxOnlineTransducerModelConfig;
|
||||
Paraformer: TSherpaOnnxOnlineParaformerModelConfig;
|
||||
Zipformer2Ctc: TSherpaOnnxOnlineZipformer2CtcModelConfig;
|
||||
Tokens: AnsiString;
|
||||
NumThreads: Integer;
|
||||
Provider: AnsiString;
|
||||
Debug: Boolean;
|
||||
ModelType: AnsiString;
|
||||
ModelingUnit: AnsiString;
|
||||
BpeVocab: AnsiString;
|
||||
function ToString: AnsiString;
|
||||
end;
|
||||
|
||||
TSherpaOnnxFeatureConfig = record
|
||||
SampleRate: Integer;
|
||||
FeatureDim: Integer;
|
||||
function ToString: AnsiString;
|
||||
end;
|
||||
|
||||
TSherpaOnnxOnlineCtcFstDecoderConfig = record
|
||||
Graph: AnsiString;
|
||||
MaxActive: Integer;
|
||||
function ToString: AnsiString;
|
||||
end;
|
||||
|
||||
TSherpaOnnxOnlineRecognizerConfig = record
|
||||
FeatConfig: TSherpaOnnxFeatureConfig;
|
||||
ModelConfig: TSherpaOnnxOnlineModelConfig;
|
||||
DecodingMethod: AnsiString;
|
||||
MaxActivePaths: Integer;
|
||||
EnableEndpoint: Boolean;
|
||||
Rule1MinTrailingSilence: Single;
|
||||
Rule2MinTrailingSilence: Single;
|
||||
Rule3MinUtteranceLength: Single;
|
||||
HotwordsFile: AnsiString;
|
||||
HotwordsScore: Single;
|
||||
CtcFstDecoderConfig: TSherpaOnnxOnlineCtcFstDecoderConfig;
|
||||
RuleFsts: AnsiString;
|
||||
RuleFars: AnsiString;
|
||||
BlankPenalty: Single;
|
||||
function ToString: AnsiString;
|
||||
end;
|
||||
|
||||
TSherpaOnnxOnlineRecognizerResult = record
|
||||
Text: AnsiString;
|
||||
Tokens: array of AnsiString;
|
||||
Timestamps: array of Single;
|
||||
function ToString: AnsiString;
|
||||
end;
|
||||
|
||||
TSherpaOnnxOnlineStream = class
|
||||
private
|
||||
Handle: Pointer;
|
||||
public
|
||||
constructor Create(P: Pointer);
|
||||
destructor Destroy; override;
|
||||
procedure AcceptWaveform(Samples: array of Single; SampleRate: Integer);
|
||||
procedure InputFinished;
|
||||
end;
|
||||
|
||||
TSherpaOnnxOnlineRecognizer = class
|
||||
private
|
||||
Handle: Pointer;
|
||||
public
|
||||
constructor Create(Config: TSherpaOnnxOnlineRecognizerConfig);
|
||||
destructor Destroy; override;
|
||||
|
||||
function CreateStream: TSherpaOnnxOnlineStream; overload;
|
||||
function CreateStream(Hotwords: AnsiString): TSherpaOnnxOnlineStream; overload;
|
||||
function IsReady(Stream: TSherpaOnnxOnlineStream): Boolean;
|
||||
procedure Decode(Stream: TSherpaOnnxOnlineStream);
|
||||
procedure Reset(Stream: TSherpaOnnxOnlineStream);
|
||||
function IsEndpoint(Stream: TSherpaOnnxOnlineStream): Boolean;
|
||||
function GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult;
|
||||
end;
|
||||
|
||||
{ It supports reading a single channel wave with 16-bit encoded samples.
|
||||
Samples are normalized to the range [-1, 1].
|
||||
}
|
||||
function SherpaOnnxReadWave(Filename: string): TSherpaOnnxWave;
|
||||
function SherpaOnnxReadWave(Filename: AnsiString): TSherpaOnnxWave;
|
||||
|
||||
implementation
|
||||
|
||||
uses
|
||||
ctypes;
|
||||
ctypes,
|
||||
fpjson,
|
||||
{ See
|
||||
- https://wiki.freepascal.org/fcl-json
|
||||
- https://www.freepascal.org/daily/doc/fcl/fpjson/getjson.html
|
||||
}
|
||||
jsonparser,
|
||||
SysUtils;
|
||||
|
||||
const
|
||||
{See https://www.freepascal.org/docs-html/prog/progap7.html}
|
||||
@@ -47,31 +152,383 @@ type
|
||||
|
||||
PSherpaOnnxWave = ^SherpaOnnxWave;
|
||||
|
||||
SherpaOnnxOnlineTransducerModelConfig = record
|
||||
Encoder: PAnsiChar;
|
||||
Decoder: PAnsiChar;
|
||||
Joiner: PAnsiChar;
|
||||
end;
|
||||
SherpaOnnxOnlineParaformerModelConfig = record
|
||||
Encoder: PAnsiChar;
|
||||
Decoder: PAnsiChar;
|
||||
end;
|
||||
SherpaOnnxOnlineZipformer2CtcModelConfig = record
|
||||
Model: PAnsiChar;
|
||||
end;
|
||||
|
||||
SherpaOnnxOnlineModelConfig= record
|
||||
Transducer: SherpaOnnxOnlineTransducerModelConfig;
|
||||
Paraformer: SherpaOnnxOnlineParaformerModelConfig;
|
||||
Zipformer2Ctc: SherpaOnnxOnlineZipformer2CtcModelConfig;
|
||||
Tokens: PAnsiChar;
|
||||
NumThreads: cint32;
|
||||
Provider: PAnsiChar;
|
||||
Debug: cint32;
|
||||
ModelType: PAnsiChar;
|
||||
ModelingUnit: PAnsiChar;
|
||||
BpeVocab: PAnsiChar;
|
||||
end;
|
||||
SherpaOnnxFeatureConfig = record
|
||||
SampleRate: cint32;
|
||||
FeatureDim: cint32;
|
||||
end;
|
||||
SherpaOnnxOnlineCtcFstDecoderConfig = record
|
||||
Graph: PAnsiChar;
|
||||
MaxActive: cint32;
|
||||
end;
|
||||
SherpaOnnxOnlineRecognizerConfig = record
|
||||
FeatConfig: SherpaOnnxFeatureConfig;
|
||||
ModelConfig: SherpaOnnxOnlineModelConfig;
|
||||
DecodingMethod: PAnsiChar;
|
||||
MaxActivePaths: cint32;
|
||||
EnableEndpoint: cint32;
|
||||
Rule1MinTrailingSilence: Single;
|
||||
Rule2MinTrailingSilence: Single;
|
||||
Rule3MinUtteranceLength: Single;
|
||||
HotwordsFile: PAnsiChar;
|
||||
HotwordsScore: Single;
|
||||
CtcFstDecoderConfig: SherpaOnnxOnlineCtcFstDecoderConfig;
|
||||
RuleFsts: PAnsiChar;
|
||||
RuleFars: PAnsiChar;
|
||||
BlankPenalty: Single;
|
||||
end;
|
||||
|
||||
PSherpaOnnxOnlineRecognizerConfig = ^SherpaOnnxOnlineRecognizerConfig;
|
||||
|
||||
function SherpaOnnxCreateOnlineRecognizer(Config: PSherpaOnnxOnlineRecognizerConfig): Pointer; cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
procedure SherpaOnnxDestroyOnlineRecognizer(Recognizer: Pointer); cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
function SherpaOnnxCreateOnlineStream(Recognizer: Pointer): Pointer; cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
function SherpaOnnxCreateOnlineStreamWithHotwords(Recognizer: Pointer; Hotwords: PAnsiChar): Pointer; cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
procedure SherpaOnnxDestroyOnlineStream(Recognizer: Pointer); cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
procedure SherpaOnnxOnlineStreamAcceptWaveform(Stream: Pointer;
|
||||
SampleRate: cint32; Samples: pcfloat; N: cint32 ); cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
procedure SherpaOnnxOnlineStreamInputFinished(Stream: Pointer); cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
function SherpaOnnxIsOnlineStreamReady(Recognizer: Pointer; Stream: Pointer): cint32; cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
procedure SherpaOnnxDecodeOnlineStream(Recognizer: Pointer; Stream: Pointer); cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
procedure SherpaOnnxOnlineStreamReset(Recognizer: Pointer; Stream: Pointer); cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
function SherpaOnnxOnlineStreamIsEndpoint(Recognizer: Pointer; Stream: Pointer): cint32; cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
function SherpaOnnxGetOnlineStreamResultAsJson(Recognizer: Pointer; Stream: Pointer): PAnsiChar; cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
procedure SherpaOnnxDestroyOnlineStreamResultJson(PJson: PAnsiChar); cdecl;
|
||||
external SherpaOnnxLibName;
|
||||
|
||||
function SherpaOnnxReadWaveWrapper(Filename: PAnsiChar): PSherpaOnnxWave; cdecl;
|
||||
external SherpaOnnxLibName name 'SherpaOnnxReadWave';
|
||||
|
||||
procedure SherpaOnnxFreeWaveWrapper(P: PSherpaOnnxWave); cdecl;
|
||||
external SherpaOnnxLibName name 'SherpaOnnxFreeWave';
|
||||
|
||||
function SherpaOnnxReadWave(Filename: string): TSherpaOnnxWave;
|
||||
function SherpaOnnxReadWave(Filename: AnsiString): TSherpaOnnxWave;
|
||||
var
|
||||
AnsiFilename: AnsiString;
|
||||
PFilename: PAnsiChar;
|
||||
PWave: PSherpaOnnxWave;
|
||||
I: Integer;
|
||||
begin
|
||||
AnsiFilename := Filename;
|
||||
PFilename := PAnsiChar(AnsiFilename);
|
||||
PFilename := PAnsiChar(Filename);
|
||||
PWave := SherpaOnnxReadWaveWrapper(PFilename);
|
||||
|
||||
Result.Samples := nil;
|
||||
SetLength(Result.Samples, PWave^.NumSamples);
|
||||
|
||||
Result.SampleRate := PWave^.SampleRate;
|
||||
|
||||
for I := Low(Result.Samples) to High(Result.Samples) do
|
||||
Result.Samples[i] := PWave^.Samples[i];
|
||||
Result.Samples[I] := PWave^.Samples[I];
|
||||
|
||||
SherpaOnnxFreeWaveWrapper(PWave);
|
||||
end;
|
||||
|
||||
function TSherpaOnnxOnlineTransducerModelConfig.ToString: AnsiString;
|
||||
begin
|
||||
Result := Format('TSherpaOnnxOnlineTransducerModelConfig(Encoder := %s, Decoder := %s, Joiner := %s)',
|
||||
[Self.Encoder, Self.Decoder, Self.Joiner]);
|
||||
end;
|
||||
|
||||
function TSherpaOnnxOnlineParaformerModelConfig.ToString: AnsiString;
|
||||
begin
|
||||
Result := Format('TSherpaOnnxOnlineParaformerModelConfig(Encoder := %s, Decoder := %s)',
|
||||
[Self.Encoder, Self.Decoder]);
|
||||
end;
|
||||
|
||||
function TSherpaOnnxOnlineZipformer2CtcModelConfig.ToString: AnsiString;
|
||||
begin
|
||||
Result := Format('TSherpaOnnxOnlineZipformer2CtcModelConfig(Model := %s)',
|
||||
[Self.Model]);
|
||||
end;
|
||||
|
||||
function TSherpaOnnxOnlineModelConfig.ToString: AnsiString;
|
||||
begin
|
||||
Result := Format('TSherpaOnnxOnlineModelConfig(Transducer := %s, ' +
|
||||
'Paraformer := %s,' +
|
||||
'Zipformer2Ctc := %s, ' +
|
||||
'Tokens := %s, ' +
|
||||
'NumThreads := %d, ' +
|
||||
'Provider := %s, ' +
|
||||
'Debug := %s, ' +
|
||||
'ModelType := %s, ' +
|
||||
'ModelingUnit := %s, ' +
|
||||
'BpeVocab := %s)'
|
||||
,
|
||||
[Self.Transducer.ToString, Self.Paraformer.ToString,
|
||||
Self.Zipformer2Ctc.ToString, Self.Tokens,
|
||||
Self.NumThreads, Self.Provider, Self.Debug.ToString,
|
||||
Self.ModelType, Self.ModelingUnit, Self.BpeVocab
|
||||
]);
|
||||
end;
|
||||
|
||||
function TSherpaOnnxFeatureConfig.ToString: AnsiString;
|
||||
begin
|
||||
Result := Format('TSherpaOnnxFeatureConfig(SampleRate := %d, FeatureDim := %d)',
|
||||
[Self.SampleRate, Self.FeatureDim]);
|
||||
end;
|
||||
|
||||
function TSherpaOnnxOnlineCtcFstDecoderConfig.ToString: AnsiString;
|
||||
begin
|
||||
Result := Format('TSherpaOnnxOnlineCtcFstDecoderConfig(Graph := %s, MaxActive := %d)',
|
||||
[Self.Graph, Self.MaxActive]);
|
||||
end;
|
||||
|
||||
function TSherpaOnnxOnlineRecognizerConfig.ToString: AnsiString;
|
||||
begin
|
||||
Result := Format('TSherpaOnnxOnlineRecognizerConfig(FeatConfg := %s, ' +
|
||||
'ModelConfig := %s, ' +
|
||||
'DecodingMethod := %s, ' +
|
||||
'MaxActivePaths := %d, ' +
|
||||
'EnableEndpoint := %s, ' +
|
||||
'Rule1MinTrailingSilence := %.1f, ' +
|
||||
'Rule2MinTrailingSilence := %.1f, ' +
|
||||
'Rule3MinUtteranceLength := %.1f, ' +
|
||||
'HotwordsFile := %s, ' +
|
||||
'HotwordsScore := %.1f, ' +
|
||||
'CtcFstDecoderConfig := %s, ' +
|
||||
'RuleFsts := %s, ' +
|
||||
'RuleFars := %s, ' +
|
||||
'BlankPenalty := %.1f' +
|
||||
')'
|
||||
,
|
||||
[Self.FeatConfig.ToString, Self.ModelConfig.ToString,
|
||||
Self.DecodingMethod, Self.MaxActivePaths, Self.EnableEndpoint.ToString,
|
||||
Self.Rule1MinTrailingSilence, Self.Rule2MinTrailingSilence,
|
||||
Self.Rule3MinUtteranceLength, Self.HotwordsFile, Self.HotwordsScore,
|
||||
Self.CtcFstDecoderConfig.ToString, Self.RuleFsts, Self.RuleFars,
|
||||
Self.BlankPenalty
|
||||
]);
|
||||
end;
|
||||
|
||||
function TSherpaOnnxOnlineRecognizerResult.ToString: AnsiString;
|
||||
var
|
||||
TokensStr: AnsiString;
|
||||
S: AnsiString;
|
||||
TimestampStr: AnsiString;
|
||||
T: Single;
|
||||
Sep: AnsiString;
|
||||
begin
|
||||
TokensStr := '[';
|
||||
Sep := '';
|
||||
for S in Self.Tokens do
|
||||
begin
|
||||
TokensStr := TokensStr + Sep + S;
|
||||
Sep := ', ';
|
||||
end;
|
||||
TokensStr := TokensStr + ']';
|
||||
|
||||
TimestampStr := '[';
|
||||
Sep := '';
|
||||
for T in Self.Timestamps do
|
||||
begin
|
||||
TimestampStr := TimestampStr + Sep + Format('%.2f', [T]);
|
||||
Sep := ', ';
|
||||
end;
|
||||
TimestampStr := TimestampStr + ']';
|
||||
|
||||
Result := Format('TSherpaOnnxOnlineRecognizerResult(Text := %s, ' +
|
||||
'Tokens := %s, ' +
|
||||
'Timestamps := %s, ' +
|
||||
')',
|
||||
[Self.Text, TokensStr, TimestampStr]);
|
||||
end;
|
||||
|
||||
constructor TSherpaOnnxOnlineRecognizer.Create(Config: TSherpaOnnxOnlineRecognizerConfig);
|
||||
var
|
||||
C: SherpaOnnxOnlineRecognizerConfig;
|
||||
begin
|
||||
Initialize(C);
|
||||
|
||||
C.FeatConfig.SampleRate := Config.FeatConfig.SampleRate;
|
||||
C.FeatConfig.FeatureDim := Config.FeatConfig.FeatureDim;
|
||||
|
||||
C.ModelConfig.Transducer.Encoder := PAnsiChar(Config.ModelConfig.Transducer.Encoder);
|
||||
C.ModelConfig.Transducer.Decoder := PAnsiChar(Config.ModelConfig.Transducer.Decoder);
|
||||
C.ModelConfig.Transducer.Joiner := PAnsiChar(Config.ModelConfig.Transducer.Joiner);
|
||||
|
||||
C.ModelConfig.Paraformer.Encoder := PAnsiChar(Config.ModelConfig.Paraformer.Encoder);
|
||||
C.ModelConfig.Paraformer.Decoder := PAnsiChar(Config.ModelConfig.Paraformer.Decoder);
|
||||
|
||||
C.ModelConfig.Zipformer2Ctc.Model := PAnsiChar(Config.ModelConfig.Zipformer2Ctc.Model);
|
||||
|
||||
C.ModelConfig.Tokens := PAnsiChar(Config.ModelConfig.Tokens);
|
||||
C.ModelConfig.NumThreads := Config.ModelConfig.NumThreads;
|
||||
C.ModelConfig.Provider := PAnsiChar(Config.ModelConfig.Provider);
|
||||
C.ModelConfig.Debug := Ord(Config.ModelConfig.Debug);
|
||||
C.ModelConfig.ModelType := PAnsiChar(Config.ModelConfig.ModelType);
|
||||
C.ModelConfig.ModelingUnit := PAnsiChar(Config.ModelConfig.ModelingUnit);
|
||||
C.ModelConfig.BpeVocab := PAnsiChar(Config.ModelConfig.BpeVocab);
|
||||
|
||||
C.DecodingMethod := PAnsiChar(Config.DecodingMethod);
|
||||
C.MaxActivePaths := Config.MaxActivePaths;
|
||||
C.EnableEndpoint := Ord(Config.EnableEndpoint);
|
||||
C.Rule1MinTrailingSilence := Config.Rule1MinTrailingSilence;
|
||||
C.Rule2MinTrailingSilence := Config.Rule2MinTrailingSilence;
|
||||
C.Rule3MinUtteranceLength := Config.Rule3MinUtteranceLength;
|
||||
C.HotwordsFile := PAnsiChar(Config.HotwordsFile);
|
||||
C.HotwordsScore := Config.HotwordsScore;
|
||||
C.CtcFstDecoderConfig.Graph := PAnsiChar(Config.CtcFstDecoderConfig.Graph);
|
||||
C.CtcFstDecoderConfig.MaxActive := Config.CtcFstDecoderConfig.MaxActive;
|
||||
C.RuleFsts := PAnsiChar(Config.RuleFsts);
|
||||
C.RuleFars := PAnsiChar(Config.RuleFars);
|
||||
C.BlankPenalty := Config.BlankPenalty;
|
||||
|
||||
Self.Handle := SherpaOnnxCreateOnlineRecognizer(@C);
|
||||
end;
|
||||
|
||||
destructor TSherpaOnnxOnlineRecognizer.Destroy;
|
||||
begin
|
||||
SherpaOnnxDestroyOnlineRecognizer(Self.Handle);
|
||||
Self.Handle := nil;
|
||||
end;
|
||||
|
||||
function TSherpaOnnxOnlineRecognizer.CreateStream: TSherpaOnnxOnlineStream;
|
||||
var
|
||||
Stream: Pointer;
|
||||
begin
|
||||
Stream := SherpaOnnxCreateOnlineStream(Self.Handle);
|
||||
Result := TSherpaOnnxOnlineStream.Create(Stream);
|
||||
end;
|
||||
|
||||
function TSherpaOnnxOnlineRecognizer.CreateStream(Hotwords: AnsiString): TSherpaOnnxOnlineStream;
|
||||
var
|
||||
Stream: Pointer;
|
||||
begin
|
||||
Stream := SherpaOnnxCreateOnlineStreamWithHotwords(Self.Handle, PAnsiChar(Hotwords));
|
||||
Result := TSherpaOnnxOnlineStream.Create(Stream);
|
||||
end;
|
||||
|
||||
function TSherpaOnnxOnlineRecognizer.IsReady(Stream: TSherpaOnnxOnlineStream): Boolean;
|
||||
begin
|
||||
Result := SherpaOnnxIsOnlineStreamReady(Self.Handle, Stream.Handle) = 1;
|
||||
end;
|
||||
|
||||
procedure TSherpaOnnxOnlineRecognizer.Decode(Stream: TSherpaOnnxOnlineStream);
|
||||
begin
|
||||
SherpaOnnxDecodeOnlineStream(Self.Handle, Stream.Handle);
|
||||
end;
|
||||
|
||||
procedure TSherpaOnnxOnlineRecognizer.Reset(Stream: TSherpaOnnxOnlineStream);
|
||||
begin
|
||||
SherpaOnnxOnlineStreamReset(Self.Handle, Stream.Handle);
|
||||
end;
|
||||
|
||||
function TSherpaOnnxOnlineRecognizer.IsEndpoint(Stream: TSherpaOnnxOnlineStream): Boolean;
|
||||
begin
|
||||
Result := SherpaOnnxOnlineStreamIsEndpoint(Self.Handle, Stream.Handle) = 1;
|
||||
end;
|
||||
|
||||
function TSherpaOnnxOnlineRecognizer.GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult;
|
||||
var
|
||||
pJson: PAnsiChar;
|
||||
JsonData: TJSONData;
|
||||
JsonObject : TJSONObject;
|
||||
JsonEnum: TJSONEnum;
|
||||
I: Integer;
|
||||
begin
|
||||
pJson := SherpaOnnxGetOnlineStreamResultAsJson(Self.Handle, Stream.Handle);
|
||||
|
||||
{
|
||||
- https://www.freepascal.org/daily/doc/fcl/fpjson/getjson.html
|
||||
- https://www.freepascal.org/daily/doc/fcl/fpjson/tjsondata.html
|
||||
- https://www.freepascal.org/daily/doc/fcl/fpjson/tjsonobject.html
|
||||
- https://www.freepascal.org/daily/doc/fcl/fpjson/tjsonenum.html
|
||||
}
|
||||
|
||||
JsonData := GetJSON(AnsiString(pJson), False);
|
||||
|
||||
JsonObject := JsonData as TJSONObject;
|
||||
|
||||
Result.Text := JsonObject.Strings['text'];
|
||||
|
||||
SetLength(Result.Tokens, JsonObject.Arrays['tokens'].Count);
|
||||
|
||||
I := 0;
|
||||
for JsonEnum in JsonObject.Arrays['tokens'] do
|
||||
begin
|
||||
Result.Tokens[I] := JsonEnum.Value.AsString;
|
||||
Inc(I);
|
||||
end;
|
||||
|
||||
SetLength(Result.Timestamps, JsonObject.Arrays['timestamps'].Count);
|
||||
I := 0;
|
||||
for JsonEnum in JsonObject.Arrays['timestamps'] do
|
||||
begin
|
||||
Result.Timestamps[I] := JsonEnum.Value.AsFloat;
|
||||
Inc(I);
|
||||
end;
|
||||
|
||||
SherpaOnnxDestroyOnlineStreamResultJson(pJson);
|
||||
end;
|
||||
|
||||
|
||||
constructor TSherpaOnnxOnlineStream.Create(P: Pointer);
|
||||
begin
|
||||
Self.Handle := P;
|
||||
end;
|
||||
|
||||
destructor TSherpaOnnxOnlineStream.Destroy;
|
||||
begin
|
||||
SherpaOnnxDestroyOnlineStream(Self.Handle);
|
||||
Self.Handle := nil;
|
||||
end;
|
||||
|
||||
procedure TSherpaOnnxOnlineStream.AcceptWaveform(Samples: array of Single; SampleRate: Integer);
|
||||
begin
|
||||
SherpaOnnxOnlineStreamAcceptWaveform(Self.Handle, SampleRate,
|
||||
pcfloat(Samples), Length(Samples));
|
||||
end;
|
||||
|
||||
procedure TSherpaOnnxOnlineStream.InputFinished;
|
||||
begin
|
||||
SherpaOnnxOnlineStreamInputFinished(Self.Handle);
|
||||
end;
|
||||
|
||||
end.
|
||||
|
||||
Reference in New Issue
Block a user