Add C API for streaming HLG decoding (#734)

This commit is contained in:
Fangjun Kuang
2024-04-05 10:31:20 +08:00
committed by GitHub
parent db67e00c77
commit dbff2eaadb
39 changed files with 839 additions and 8 deletions

View File

@@ -15,6 +15,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-tts-play", "offline
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "spoken-language-identification", "spoken-language-identification\spoken-language-identification.csproj", "{3D7CF3D6-AC45-4D50-9619-5687B1443E94}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "streaming-hlg-decoding", "streaming-hlg-decoding\streaming-hlg-decoding.csproj", "{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -48,5 +50,9 @@ Global
{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Debug|Any CPU.Build.0 = Debug|Any CPU
{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Release|Any CPU.ActiveCfg = Release|Any CPU
{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Release|Any CPU.Build.0 = Release|Any CPU
{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Debug|Any CPU.Build.0 = Debug|Any CPU
{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.ActiveCfg = Release|Any CPU
{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal

View File

@@ -0,0 +1,66 @@
// Copyright (c) 2024 Xiaomi Corporation
//
// This file shows how to do streaming HLG decoding.
//
// 1. Download the model for testing
//
// curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
// tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
// rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
//
// 2. Now run it
//
// dotnet run
using SherpaOnnx;
using System.Collections.Generic;
using System;
class StreamingHlgDecodingDemo
{
static void Main(string[] args)
{
var config = new OnlineRecognizerConfig();
config.FeatConfig.SampleRate = 16000;
config.FeatConfig.FeatureDim = 80;
config.ModelConfig.Zipformer2Ctc.Model = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx";
config.ModelConfig.Tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt";
config.ModelConfig.Provider = "cpu";
config.ModelConfig.NumThreads = 1;
config.ModelConfig.Debug = 0;
config.CtcFstDecoderConfig.Graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst";
OnlineRecognizer recognizer = new OnlineRecognizer(config);
var filename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav";
WaveReader waveReader = new WaveReader(filename);
OnlineStream s = recognizer.CreateStream();
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
s.AcceptWaveform(waveReader.SampleRate, tailPadding);
s.InputFinished();
while (recognizer.IsReady(s))
{
recognizer.Decode(s);
}
OnlineRecognizerResult r = recognizer.GetResult(s);
var text = r.Text;
var tokens = r.Tokens;
Console.WriteLine("--------------------");
Console.WriteLine(filename);
Console.WriteLine("text: {0}", text);
Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens));
Console.Write("timestamps: [");
r.Timestamps.ToList().ForEach(i => Console.Write(String.Format("{0:0.00}", i) + ", "));
Console.WriteLine("]");
Console.WriteLine("--------------------");
}
}

View File

@@ -0,0 +1 @@
../online-decode-files/WaveReader.cs

View File

@@ -0,0 +1,11 @@
#!/usr/bin/env bash
set -ex
if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
fi
dotnet run -c Release

View File

@@ -0,0 +1,15 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<RootNamespace>streaming_hlg_decoding</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
</ItemGroup>
</Project>