Add Go API for homophone replacer (#2168)
This commit is contained in:
98
.github/workflows/test-go.yaml
vendored
98
.github/workflows/test-go.yaml
vendored
@@ -142,32 +142,31 @@ jobs:
|
||||
name: ${{ matrix.os }}-libs
|
||||
path: to-upload/
|
||||
|
||||
- name: Test speech enhancement (GTCRN)
|
||||
- name: Test streaming decoding files
|
||||
shell: bash
|
||||
run: |
|
||||
cd scripts/go/_internal/speech-enhancement-gtcrn/
|
||||
|
||||
./run.sh
|
||||
|
||||
cd scripts/go/_internal/streaming-decode-files
|
||||
ls -lh
|
||||
go mod tidy
|
||||
cat go.mod
|
||||
go build
|
||||
ls -lh
|
||||
|
||||
- name: Test audio tagging
|
||||
shell: bash
|
||||
run: |
|
||||
cd scripts/go/_internal/audio-tagging/
|
||||
echo "Test zipformer2 CTC"
|
||||
./run-zipformer2-ctc-with-hr.sh
|
||||
./run-zipformer2-ctc.sh
|
||||
rm -rf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13
|
||||
|
||||
./run.sh
|
||||
echo "Test transducer"
|
||||
./run-transducer.sh
|
||||
rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26
|
||||
|
||||
ls -lh
|
||||
./run-transducer-itn.sh
|
||||
rm -rf sherpa-onnx-streaming-*
|
||||
|
||||
- name: Test Keyword spotting
|
||||
shell: bash
|
||||
run: |
|
||||
cd scripts/go/_internal/keyword-spotting-from-file/
|
||||
|
||||
./run.sh
|
||||
|
||||
ls -lh
|
||||
echo "Test paraformer"
|
||||
./run-paraformer.sh
|
||||
rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en
|
||||
|
||||
- name: Test non-streaming decoding files
|
||||
shell: bash
|
||||
@@ -179,6 +178,11 @@ jobs:
|
||||
go build
|
||||
ls -lh
|
||||
|
||||
echo "Test SenseVoice ctc"
|
||||
./run-sense-voice-small-with-hr.sh
|
||||
./run-sense-voice-small.sh
|
||||
rm -rf sherpa-onnx-sense-*
|
||||
|
||||
echo "Test Dolphin CTC"
|
||||
./run-dolphin-ctc-base.sh
|
||||
rm -rf sherpa-onnx-dolphin-*
|
||||
@@ -191,10 +195,6 @@ jobs:
|
||||
./run-moonshine.sh
|
||||
rm -rf sherpa-onnx-*
|
||||
|
||||
echo "Test SenseVoice ctc"
|
||||
./run-sense-voice-small.sh
|
||||
rm -rf sherpa-onnx-sense-*
|
||||
|
||||
echo "Test telespeech ctc"
|
||||
./run-telespeech-ctc.sh
|
||||
rm -rf sherpa-onnx-telespeech-ctc-*
|
||||
@@ -224,6 +224,33 @@ jobs:
|
||||
./run-tdnn-yesno.sh
|
||||
rm -rf sherpa-onnx-tdnn-yesno
|
||||
|
||||
- name: Test speech enhancement (GTCRN)
|
||||
shell: bash
|
||||
run: |
|
||||
cd scripts/go/_internal/speech-enhancement-gtcrn/
|
||||
|
||||
./run.sh
|
||||
|
||||
ls -lh
|
||||
|
||||
- name: Test audio tagging
|
||||
shell: bash
|
||||
run: |
|
||||
cd scripts/go/_internal/audio-tagging/
|
||||
|
||||
./run.sh
|
||||
|
||||
ls -lh
|
||||
|
||||
- name: Test Keyword spotting
|
||||
shell: bash
|
||||
run: |
|
||||
cd scripts/go/_internal/keyword-spotting-from-file/
|
||||
|
||||
./run.sh
|
||||
|
||||
ls -lh
|
||||
|
||||
- name: Test adding punctuation
|
||||
shell: bash
|
||||
run: |
|
||||
@@ -301,28 +328,3 @@ jobs:
|
||||
with:
|
||||
name: tts-waves-${{ matrix.os }}
|
||||
path: tts-waves
|
||||
|
||||
- name: Test streaming decoding files
|
||||
shell: bash
|
||||
run: |
|
||||
cd scripts/go/_internal/streaming-decode-files
|
||||
ls -lh
|
||||
go mod tidy
|
||||
cat go.mod
|
||||
go build
|
||||
ls -lh
|
||||
|
||||
echo "Test zipformer2 CTC"
|
||||
./run-zipformer2-ctc.sh
|
||||
rm -rf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13
|
||||
|
||||
echo "Test transducer"
|
||||
./run-transducer.sh
|
||||
rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26
|
||||
|
||||
./run-transducer-itn.sh
|
||||
rm -rf sherpa-onnx-streaming-*
|
||||
|
||||
echo "Test paraformer"
|
||||
./run-paraformer.sh
|
||||
rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en
|
||||
|
||||
@@ -66,6 +66,10 @@ func main() {
|
||||
flag.StringVar(&config.RuleFsts, "rule-fsts", "", "If not empty, path to rule fst for inverse text normalization")
|
||||
flag.StringVar(&config.RuleFars, "rule-fars", "", "If not empty, path to rule fst archives for inverse text normalization")
|
||||
|
||||
flag.StringVar(&config.Hr.DictDir, "hr-dict-dir", "", "If not empty, path to the jieba dict dir for homonphone replacer")
|
||||
flag.StringVar(&config.Hr.Lexicon, "hr-lexicon", "", "If not empty, path to the lexicon.txt for homonphone replacer")
|
||||
flag.StringVar(&config.Hr.RuleFsts, "hr-rule-fsts", "", "If not empty, path to the replace.fst for homonphone replacer")
|
||||
|
||||
flag.Parse()
|
||||
|
||||
if len(flag.Args()) != 1 {
|
||||
|
||||
31
go-api-examples/non-streaming-decode-files/run-sense-voice-small-with-hr.sh
Executable file
31
go-api-examples/non-streaming-decode-files/run-sense-voice-small-with-hr.sh
Executable file
@@ -0,0 +1,31 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [ ! -d sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
|
||||
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
|
||||
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
|
||||
fi
|
||||
|
||||
if [ ! -d dict ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
|
||||
tar xf dict.tar.bz2
|
||||
rm dict.tar.bz2
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
|
||||
fi
|
||||
|
||||
go mod tidy
|
||||
go build
|
||||
|
||||
./non-streaming-decode-files \
|
||||
--sense-voice-model ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx \
|
||||
--tokens ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt \
|
||||
--debug 1 \
|
||||
--hr-dict-dir ./dict \
|
||||
--hr-lexicon ./lexicon.txt \
|
||||
--hr-rule-fsts ./replace.fst \
|
||||
./test-hr.wav
|
||||
@@ -32,6 +32,9 @@ func main() {
|
||||
flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search")
|
||||
flag.StringVar(&config.RuleFsts, "rule-fsts", "", "If not empty, path to rule fst for inverse text normalization")
|
||||
flag.StringVar(&config.RuleFars, "rule-fars", "", "If not empty, path to rule fst archives for inverse text normalization")
|
||||
flag.StringVar(&config.Hr.DictDir, "hr-dict-dir", "", "If not empty, path to the jieba dict dir for homonphone replacer")
|
||||
flag.StringVar(&config.Hr.Lexicon, "hr-lexicon", "", "If not empty, path to the lexicon.txt for homonphone replacer")
|
||||
flag.StringVar(&config.Hr.RuleFsts, "hr-rule-fsts", "", "If not empty, path to the replace.fst for homonphone replacer")
|
||||
|
||||
flag.Parse()
|
||||
|
||||
|
||||
30
go-api-examples/streaming-decode-files/run-zipformer2-ctc-with-hr.sh
Executable file
30
go-api-examples/streaming-decode-files/run-zipformer2-ctc-with-hr.sh
Executable file
@@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [ ! -d sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
|
||||
tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
|
||||
rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
|
||||
fi
|
||||
|
||||
if [ ! -d dict ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
|
||||
tar xf dict.tar.bz2
|
||||
rm dict.tar.bz2
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
|
||||
fi
|
||||
|
||||
go mod tidy
|
||||
go build
|
||||
|
||||
./streaming-decode-files \
|
||||
--zipformer2-ctc ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx \
|
||||
--tokens ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt \
|
||||
--hr-dict-dir ./dict \
|
||||
--hr-lexicon ./lexicon.txt \
|
||||
--hr-rule-fsts ./replace.fst \
|
||||
./test-hr.wav
|
||||
@@ -0,0 +1 @@
|
||||
../../../../go-api-examples/non-streaming-decode-files/run-sense-voice-small-with-hr.sh
|
||||
@@ -0,0 +1 @@
|
||||
../../../../go-api-examples/streaming-decode-files/run-zipformer2-ctc-with-hr.sh
|
||||
@@ -108,6 +108,12 @@ type OnlineCtcFstDecoderConfig struct {
|
||||
MaxActive int
|
||||
}
|
||||
|
||||
type HomophoneReplacerConfig struct {
|
||||
DictDir string
|
||||
Lexicon string
|
||||
RuleFsts string
|
||||
}
|
||||
|
||||
// Configuration for the online/streaming recognizer.
|
||||
type OnlineRecognizerConfig struct {
|
||||
FeatConfig FeatureConfig
|
||||
@@ -137,6 +143,7 @@ type OnlineRecognizerConfig struct {
|
||||
RuleFars string
|
||||
HotwordsBuf string
|
||||
HotwordsBufSize int
|
||||
Hr HomophoneReplacerConfig
|
||||
}
|
||||
|
||||
// It contains the recognition result for a online stream.
|
||||
@@ -239,6 +246,15 @@ func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer {
|
||||
defer C.free(unsafe.Pointer(c.ctc_fst_decoder_config.graph))
|
||||
c.ctc_fst_decoder_config.max_active = C.int(config.CtcFstDecoderConfig.MaxActive)
|
||||
|
||||
c.hr.dict_dir = C.CString(config.Hr.DictDir)
|
||||
defer C.free(unsafe.Pointer(c.hr.dict_dir))
|
||||
|
||||
c.hr.lexicon = C.CString(config.Hr.Lexicon)
|
||||
defer C.free(unsafe.Pointer(c.hr.lexicon))
|
||||
|
||||
c.hr.rule_fsts = C.CString(config.Hr.RuleFsts)
|
||||
defer C.free(unsafe.Pointer(c.hr.rule_fsts))
|
||||
|
||||
impl := C.SherpaOnnxCreateOnlineRecognizer(&c)
|
||||
if impl == nil {
|
||||
return nil
|
||||
@@ -462,6 +478,7 @@ type OfflineRecognizerConfig struct {
|
||||
BlankPenalty float32
|
||||
RuleFsts string
|
||||
RuleFars string
|
||||
Hr HomophoneReplacerConfig
|
||||
}
|
||||
|
||||
// It wraps a pointer from C
|
||||
@@ -549,6 +566,10 @@ func newCOfflineRecognizerConfig(config *OfflineRecognizerConfig) *C.struct_Sher
|
||||
|
||||
c.rule_fsts = C.CString(config.RuleFsts)
|
||||
c.rule_fars = C.CString(config.RuleFars)
|
||||
|
||||
c.hr.dict_dir = C.CString(config.Hr.DictDir)
|
||||
c.hr.lexicon = C.CString(config.Hr.Lexicon)
|
||||
c.hr.rule_fsts = C.CString(config.Hr.RuleFsts)
|
||||
return &c
|
||||
}
|
||||
func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig) {
|
||||
@@ -676,10 +697,26 @@ func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig)
|
||||
C.free(unsafe.Pointer(c.rule_fsts))
|
||||
c.rule_fsts = nil
|
||||
}
|
||||
|
||||
if c.rule_fars != nil {
|
||||
C.free(unsafe.Pointer(c.rule_fars))
|
||||
c.rule_fars = nil
|
||||
}
|
||||
|
||||
if c.hr.dict_dir != nil {
|
||||
C.free(unsafe.Pointer(c.hr.dict_dir))
|
||||
c.hr.dict_dir = nil
|
||||
}
|
||||
|
||||
if c.hr.lexicon != nil {
|
||||
C.free(unsafe.Pointer(c.hr.lexicon))
|
||||
c.hr.lexicon = nil
|
||||
}
|
||||
|
||||
if c.hr.rule_fsts != nil {
|
||||
C.free(unsafe.Pointer(c.hr.rule_fsts))
|
||||
c.hr.rule_fsts = nil
|
||||
}
|
||||
}
|
||||
|
||||
// Frees the internal pointer of the recognition to avoid memory leak.
|
||||
|
||||
Reference in New Issue
Block a user