Inverse text normalization API for other programming languages (#1019)

This commit is contained in:
Fangjun Kuang
2024-06-17 17:02:39 +08:00
committed by GitHub
parent b0f7ed3ee3
commit 6e09933d99
39 changed files with 669 additions and 104 deletions

View File

@@ -48,6 +48,8 @@ func main() {
flag.StringVar(&config.DecodingMethod, "decoding-method", "greedy_search", "Decoding method. Possible values: greedy_search, modified_beam_search")
flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search")
flag.StringVar(&config.RuleFsts, "rule-fsts", "", "If not empty, path to rule fst for inverse text normalization")
flag.StringVar(&config.RuleFars, "rule-fars", "", "If not empty, path to rule fst archives for inverse text normalization")
flag.Parse()

View File

@@ -0,0 +1,28 @@
#!/usr/bin/env bash
set -ex
if [ ! -d sherpa-onnx-paraformer-zh-2023-03-28 ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
fi
if [ ! -f ./itn-zh-number.wav ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
fi
if [ ! -f ./itn_zh_number.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi
go mod tidy
go build
./non-streaming-decode-files \
--paraformer ./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx \
--tokens ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \
--model-type paraformer \
--rule-fsts ./itn_zh_number.fst \
--debug 0 \
./itn-zh-number.wav

View File

@@ -14,6 +14,6 @@ go build
./non-streaming-decode-files \
--telespeech-ctc ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx \
--tokens ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt \
--model-type telespeech-ctc \
--model-type telespeech_ctc \
--debug 0 \
./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/test_wavs/3-sichuan.wav