Add C API for streaming HLG decoding (#734)

This commit is contained in:
Fangjun Kuang
2024-04-05 10:31:20 +08:00
committed by GitHub
parent db67e00c77
commit dbff2eaadb
39 changed files with 839 additions and 8 deletions

View File

@@ -0,0 +1,19 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<RootNamespace>streaming_hlg_decoding</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<PropertyGroup>
<RestoreSources>/tmp/packages;$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
</ItemGroup>
</Project>

View File

@@ -116,6 +116,21 @@ namespace SherpaOnnx
public int FeatureDim;
}
[StructLayout(LayoutKind.Sequential)]
public struct OnlineCtcFstDecoderConfig
{
public OnlineCtcFstDecoderConfig()
{
Graph = "";
MaxActive = 3000;
}
[MarshalAs(UnmanagedType.LPStr)]
public string Graph;
public int MaxActive;
}
[StructLayout(LayoutKind.Sequential)]
public struct OnlineRecognizerConfig
{
@@ -131,6 +146,7 @@ namespace SherpaOnnx
Rule3MinUtteranceLength = 20.0F;
HotwordsFile = "";
HotwordsScore = 1.5F;
CtcFstDecoderConfig = new OnlineCtcFstDecoderConfig();
}
public FeatureConfig FeatConfig;
public OnlineModelConfig ModelConfig;
@@ -167,6 +183,8 @@ namespace SherpaOnnx
/// Bonus score for each token in hotwords.
public float HotwordsScore;
public OnlineCtcFstDecoderConfig CtcFstDecoderConfig;
}
public class OnlineRecognizerResult

View File

@@ -0,0 +1 @@
streaming-hlg-decoding

View File

@@ -0,0 +1,5 @@
module streaming-hlg-decoding
go 1.12
replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../

View File

@@ -0,0 +1 @@
../../../../go-api-examples/streaming-hlg-decoding/main.go

View File

@@ -0,0 +1 @@
../../../../go-api-examples/streaming-hlg-decoding/run.sh

View File

@@ -99,6 +99,11 @@ type FeatureConfig struct {
FeatureDim int
}
type OnlineCtcFstDecoderConfig struct {
Graph string
MaxActive int
}
// Configuration for the online/streaming recognizer.
type OnlineRecognizerConfig struct {
FeatConfig FeatureConfig
@@ -120,6 +125,7 @@ type OnlineRecognizerConfig struct {
Rule1MinTrailingSilence float32
Rule2MinTrailingSilence float32
Rule3MinUtteranceLength float32
CtcFstDecoderConfig OnlineCtcFstDecoderConfig
}
// It contains the recognition result for a online stream.
@@ -190,6 +196,10 @@ func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer {
c.rule2_min_trailing_silence = C.float(config.Rule2MinTrailingSilence)
c.rule3_min_utterance_length = C.float(config.Rule3MinUtteranceLength)
c.ctc_fst_decoder_config.graph = C.CString(config.CtcFstDecoderConfig.Graph)
defer C.free(unsafe.Pointer(c.ctc_fst_decoder_config.graph))
c.ctc_fst_decoder_config.max_active = C.int(config.CtcFstDecoderConfig.MaxActive)
recognizer := &OnlineRecognizer{}
recognizer.impl = C.CreateOnlineRecognizer(&c)