Add Go API for homophone replacer (#2168)

This commit is contained in:
Fangjun Kuang
2025-04-30 23:47:38 +08:00
committed by GitHub
parent 85df96d528
commit fcb4c4eb2c
8 changed files with 157 additions and 48 deletions

View File

@@ -66,6 +66,10 @@ func main() {
flag.StringVar(&config.RuleFsts, "rule-fsts", "", "If not empty, path to rule fst for inverse text normalization")
flag.StringVar(&config.RuleFars, "rule-fars", "", "If not empty, path to rule fst archives for inverse text normalization")
flag.StringVar(&config.Hr.DictDir, "hr-dict-dir", "", "If not empty, path to the jieba dict dir for homonphone replacer")
flag.StringVar(&config.Hr.Lexicon, "hr-lexicon", "", "If not empty, path to the lexicon.txt for homonphone replacer")
flag.StringVar(&config.Hr.RuleFsts, "hr-rule-fsts", "", "If not empty, path to the replace.fst for homonphone replacer")
flag.Parse()
if len(flag.Args()) != 1 {

View File

@@ -0,0 +1,31 @@
#!/usr/bin/env bash
set -ex
if [ ! -d sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
fi
if [ ! -d dict ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
tar xf dict.tar.bz2
rm dict.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
fi
go mod tidy
go build
./non-streaming-decode-files \
--sense-voice-model ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx \
--tokens ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt \
--debug 1 \
--hr-dict-dir ./dict \
--hr-lexicon ./lexicon.txt \
--hr-rule-fsts ./replace.fst \
./test-hr.wav

View File

@@ -32,6 +32,9 @@ func main() {
flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search")
flag.StringVar(&config.RuleFsts, "rule-fsts", "", "If not empty, path to rule fst for inverse text normalization")
flag.StringVar(&config.RuleFars, "rule-fars", "", "If not empty, path to rule fst archives for inverse text normalization")
flag.StringVar(&config.Hr.DictDir, "hr-dict-dir", "", "If not empty, path to the jieba dict dir for homonphone replacer")
flag.StringVar(&config.Hr.Lexicon, "hr-lexicon", "", "If not empty, path to the lexicon.txt for homonphone replacer")
flag.StringVar(&config.Hr.RuleFsts, "hr-rule-fsts", "", "If not empty, path to the replace.fst for homonphone replacer")
flag.Parse()

View File

@@ -0,0 +1,30 @@
#!/usr/bin/env bash
set -ex
if [ ! -d sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
fi
if [ ! -d dict ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
tar xf dict.tar.bz2
rm dict.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
fi
go mod tidy
go build
./streaming-decode-files \
--zipformer2-ctc ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx \
--tokens ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt \
--hr-dict-dir ./dict \
--hr-lexicon ./lexicon.txt \
--hr-rule-fsts ./replace.fst \
./test-hr.wav