Add Go API for homophone replacer (#2168)
This commit is contained in:
98
.github/workflows/test-go.yaml
vendored
98
.github/workflows/test-go.yaml
vendored
@@ -142,32 +142,31 @@ jobs:
|
|||||||
name: ${{ matrix.os }}-libs
|
name: ${{ matrix.os }}-libs
|
||||||
path: to-upload/
|
path: to-upload/
|
||||||
|
|
||||||
- name: Test speech enhancement (GTCRN)
|
- name: Test streaming decoding files
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
cd scripts/go/_internal/speech-enhancement-gtcrn/
|
cd scripts/go/_internal/streaming-decode-files
|
||||||
|
ls -lh
|
||||||
./run.sh
|
go mod tidy
|
||||||
|
cat go.mod
|
||||||
|
go build
|
||||||
ls -lh
|
ls -lh
|
||||||
|
|
||||||
- name: Test audio tagging
|
echo "Test zipformer2 CTC"
|
||||||
shell: bash
|
./run-zipformer2-ctc-with-hr.sh
|
||||||
run: |
|
./run-zipformer2-ctc.sh
|
||||||
cd scripts/go/_internal/audio-tagging/
|
rm -rf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13
|
||||||
|
|
||||||
./run.sh
|
echo "Test transducer"
|
||||||
|
./run-transducer.sh
|
||||||
|
rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26
|
||||||
|
|
||||||
ls -lh
|
./run-transducer-itn.sh
|
||||||
|
rm -rf sherpa-onnx-streaming-*
|
||||||
|
|
||||||
- name: Test Keyword spotting
|
echo "Test paraformer"
|
||||||
shell: bash
|
./run-paraformer.sh
|
||||||
run: |
|
rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en
|
||||||
cd scripts/go/_internal/keyword-spotting-from-file/
|
|
||||||
|
|
||||||
./run.sh
|
|
||||||
|
|
||||||
ls -lh
|
|
||||||
|
|
||||||
- name: Test non-streaming decoding files
|
- name: Test non-streaming decoding files
|
||||||
shell: bash
|
shell: bash
|
||||||
@@ -179,6 +178,11 @@ jobs:
|
|||||||
go build
|
go build
|
||||||
ls -lh
|
ls -lh
|
||||||
|
|
||||||
|
echo "Test SenseVoice ctc"
|
||||||
|
./run-sense-voice-small-with-hr.sh
|
||||||
|
./run-sense-voice-small.sh
|
||||||
|
rm -rf sherpa-onnx-sense-*
|
||||||
|
|
||||||
echo "Test Dolphin CTC"
|
echo "Test Dolphin CTC"
|
||||||
./run-dolphin-ctc-base.sh
|
./run-dolphin-ctc-base.sh
|
||||||
rm -rf sherpa-onnx-dolphin-*
|
rm -rf sherpa-onnx-dolphin-*
|
||||||
@@ -191,10 +195,6 @@ jobs:
|
|||||||
./run-moonshine.sh
|
./run-moonshine.sh
|
||||||
rm -rf sherpa-onnx-*
|
rm -rf sherpa-onnx-*
|
||||||
|
|
||||||
echo "Test SenseVoice ctc"
|
|
||||||
./run-sense-voice-small.sh
|
|
||||||
rm -rf sherpa-onnx-sense-*
|
|
||||||
|
|
||||||
echo "Test telespeech ctc"
|
echo "Test telespeech ctc"
|
||||||
./run-telespeech-ctc.sh
|
./run-telespeech-ctc.sh
|
||||||
rm -rf sherpa-onnx-telespeech-ctc-*
|
rm -rf sherpa-onnx-telespeech-ctc-*
|
||||||
@@ -224,6 +224,33 @@ jobs:
|
|||||||
./run-tdnn-yesno.sh
|
./run-tdnn-yesno.sh
|
||||||
rm -rf sherpa-onnx-tdnn-yesno
|
rm -rf sherpa-onnx-tdnn-yesno
|
||||||
|
|
||||||
|
- name: Test speech enhancement (GTCRN)
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
cd scripts/go/_internal/speech-enhancement-gtcrn/
|
||||||
|
|
||||||
|
./run.sh
|
||||||
|
|
||||||
|
ls -lh
|
||||||
|
|
||||||
|
- name: Test audio tagging
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
cd scripts/go/_internal/audio-tagging/
|
||||||
|
|
||||||
|
./run.sh
|
||||||
|
|
||||||
|
ls -lh
|
||||||
|
|
||||||
|
- name: Test Keyword spotting
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
cd scripts/go/_internal/keyword-spotting-from-file/
|
||||||
|
|
||||||
|
./run.sh
|
||||||
|
|
||||||
|
ls -lh
|
||||||
|
|
||||||
- name: Test adding punctuation
|
- name: Test adding punctuation
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@@ -301,28 +328,3 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
name: tts-waves-${{ matrix.os }}
|
name: tts-waves-${{ matrix.os }}
|
||||||
path: tts-waves
|
path: tts-waves
|
||||||
|
|
||||||
- name: Test streaming decoding files
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
cd scripts/go/_internal/streaming-decode-files
|
|
||||||
ls -lh
|
|
||||||
go mod tidy
|
|
||||||
cat go.mod
|
|
||||||
go build
|
|
||||||
ls -lh
|
|
||||||
|
|
||||||
echo "Test zipformer2 CTC"
|
|
||||||
./run-zipformer2-ctc.sh
|
|
||||||
rm -rf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13
|
|
||||||
|
|
||||||
echo "Test transducer"
|
|
||||||
./run-transducer.sh
|
|
||||||
rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26
|
|
||||||
|
|
||||||
./run-transducer-itn.sh
|
|
||||||
rm -rf sherpa-onnx-streaming-*
|
|
||||||
|
|
||||||
echo "Test paraformer"
|
|
||||||
./run-paraformer.sh
|
|
||||||
rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en
|
|
||||||
|
|||||||
@@ -66,6 +66,10 @@ func main() {
|
|||||||
flag.StringVar(&config.RuleFsts, "rule-fsts", "", "If not empty, path to rule fst for inverse text normalization")
|
flag.StringVar(&config.RuleFsts, "rule-fsts", "", "If not empty, path to rule fst for inverse text normalization")
|
||||||
flag.StringVar(&config.RuleFars, "rule-fars", "", "If not empty, path to rule fst archives for inverse text normalization")
|
flag.StringVar(&config.RuleFars, "rule-fars", "", "If not empty, path to rule fst archives for inverse text normalization")
|
||||||
|
|
||||||
|
flag.StringVar(&config.Hr.DictDir, "hr-dict-dir", "", "If not empty, path to the jieba dict dir for homonphone replacer")
|
||||||
|
flag.StringVar(&config.Hr.Lexicon, "hr-lexicon", "", "If not empty, path to the lexicon.txt for homonphone replacer")
|
||||||
|
flag.StringVar(&config.Hr.RuleFsts, "hr-rule-fsts", "", "If not empty, path to the replace.fst for homonphone replacer")
|
||||||
|
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
if len(flag.Args()) != 1 {
|
if len(flag.Args()) != 1 {
|
||||||
|
|||||||
31
go-api-examples/non-streaming-decode-files/run-sense-voice-small-with-hr.sh
Executable file
31
go-api-examples/non-streaming-decode-files/run-sense-voice-small-with-hr.sh
Executable file
@@ -0,0 +1,31 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
if [ ! -d sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
|
||||||
|
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -d dict ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
|
||||||
|
tar xf dict.tar.bz2
|
||||||
|
rm dict.tar.bz2
|
||||||
|
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
|
||||||
|
fi
|
||||||
|
|
||||||
|
go mod tidy
|
||||||
|
go build
|
||||||
|
|
||||||
|
./non-streaming-decode-files \
|
||||||
|
--sense-voice-model ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx \
|
||||||
|
--tokens ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt \
|
||||||
|
--debug 1 \
|
||||||
|
--hr-dict-dir ./dict \
|
||||||
|
--hr-lexicon ./lexicon.txt \
|
||||||
|
--hr-rule-fsts ./replace.fst \
|
||||||
|
./test-hr.wav
|
||||||
@@ -32,6 +32,9 @@ func main() {
|
|||||||
flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search")
|
flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search")
|
||||||
flag.StringVar(&config.RuleFsts, "rule-fsts", "", "If not empty, path to rule fst for inverse text normalization")
|
flag.StringVar(&config.RuleFsts, "rule-fsts", "", "If not empty, path to rule fst for inverse text normalization")
|
||||||
flag.StringVar(&config.RuleFars, "rule-fars", "", "If not empty, path to rule fst archives for inverse text normalization")
|
flag.StringVar(&config.RuleFars, "rule-fars", "", "If not empty, path to rule fst archives for inverse text normalization")
|
||||||
|
flag.StringVar(&config.Hr.DictDir, "hr-dict-dir", "", "If not empty, path to the jieba dict dir for homonphone replacer")
|
||||||
|
flag.StringVar(&config.Hr.Lexicon, "hr-lexicon", "", "If not empty, path to the lexicon.txt for homonphone replacer")
|
||||||
|
flag.StringVar(&config.Hr.RuleFsts, "hr-rule-fsts", "", "If not empty, path to the replace.fst for homonphone replacer")
|
||||||
|
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
|
|||||||
30
go-api-examples/streaming-decode-files/run-zipformer2-ctc-with-hr.sh
Executable file
30
go-api-examples/streaming-decode-files/run-zipformer2-ctc-with-hr.sh
Executable file
@@ -0,0 +1,30 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
if [ ! -d sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
|
||||||
|
rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -d dict ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
|
||||||
|
tar xf dict.tar.bz2
|
||||||
|
rm dict.tar.bz2
|
||||||
|
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
|
||||||
|
fi
|
||||||
|
|
||||||
|
go mod tidy
|
||||||
|
go build
|
||||||
|
|
||||||
|
./streaming-decode-files \
|
||||||
|
--zipformer2-ctc ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx \
|
||||||
|
--tokens ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt \
|
||||||
|
--hr-dict-dir ./dict \
|
||||||
|
--hr-lexicon ./lexicon.txt \
|
||||||
|
--hr-rule-fsts ./replace.fst \
|
||||||
|
./test-hr.wav
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
../../../../go-api-examples/non-streaming-decode-files/run-sense-voice-small-with-hr.sh
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
../../../../go-api-examples/streaming-decode-files/run-zipformer2-ctc-with-hr.sh
|
||||||
@@ -108,6 +108,12 @@ type OnlineCtcFstDecoderConfig struct {
|
|||||||
MaxActive int
|
MaxActive int
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type HomophoneReplacerConfig struct {
|
||||||
|
DictDir string
|
||||||
|
Lexicon string
|
||||||
|
RuleFsts string
|
||||||
|
}
|
||||||
|
|
||||||
// Configuration for the online/streaming recognizer.
|
// Configuration for the online/streaming recognizer.
|
||||||
type OnlineRecognizerConfig struct {
|
type OnlineRecognizerConfig struct {
|
||||||
FeatConfig FeatureConfig
|
FeatConfig FeatureConfig
|
||||||
@@ -137,6 +143,7 @@ type OnlineRecognizerConfig struct {
|
|||||||
RuleFars string
|
RuleFars string
|
||||||
HotwordsBuf string
|
HotwordsBuf string
|
||||||
HotwordsBufSize int
|
HotwordsBufSize int
|
||||||
|
Hr HomophoneReplacerConfig
|
||||||
}
|
}
|
||||||
|
|
||||||
// It contains the recognition result for a online stream.
|
// It contains the recognition result for a online stream.
|
||||||
@@ -239,6 +246,15 @@ func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer {
|
|||||||
defer C.free(unsafe.Pointer(c.ctc_fst_decoder_config.graph))
|
defer C.free(unsafe.Pointer(c.ctc_fst_decoder_config.graph))
|
||||||
c.ctc_fst_decoder_config.max_active = C.int(config.CtcFstDecoderConfig.MaxActive)
|
c.ctc_fst_decoder_config.max_active = C.int(config.CtcFstDecoderConfig.MaxActive)
|
||||||
|
|
||||||
|
c.hr.dict_dir = C.CString(config.Hr.DictDir)
|
||||||
|
defer C.free(unsafe.Pointer(c.hr.dict_dir))
|
||||||
|
|
||||||
|
c.hr.lexicon = C.CString(config.Hr.Lexicon)
|
||||||
|
defer C.free(unsafe.Pointer(c.hr.lexicon))
|
||||||
|
|
||||||
|
c.hr.rule_fsts = C.CString(config.Hr.RuleFsts)
|
||||||
|
defer C.free(unsafe.Pointer(c.hr.rule_fsts))
|
||||||
|
|
||||||
impl := C.SherpaOnnxCreateOnlineRecognizer(&c)
|
impl := C.SherpaOnnxCreateOnlineRecognizer(&c)
|
||||||
if impl == nil {
|
if impl == nil {
|
||||||
return nil
|
return nil
|
||||||
@@ -462,6 +478,7 @@ type OfflineRecognizerConfig struct {
|
|||||||
BlankPenalty float32
|
BlankPenalty float32
|
||||||
RuleFsts string
|
RuleFsts string
|
||||||
RuleFars string
|
RuleFars string
|
||||||
|
Hr HomophoneReplacerConfig
|
||||||
}
|
}
|
||||||
|
|
||||||
// It wraps a pointer from C
|
// It wraps a pointer from C
|
||||||
@@ -549,6 +566,10 @@ func newCOfflineRecognizerConfig(config *OfflineRecognizerConfig) *C.struct_Sher
|
|||||||
|
|
||||||
c.rule_fsts = C.CString(config.RuleFsts)
|
c.rule_fsts = C.CString(config.RuleFsts)
|
||||||
c.rule_fars = C.CString(config.RuleFars)
|
c.rule_fars = C.CString(config.RuleFars)
|
||||||
|
|
||||||
|
c.hr.dict_dir = C.CString(config.Hr.DictDir)
|
||||||
|
c.hr.lexicon = C.CString(config.Hr.Lexicon)
|
||||||
|
c.hr.rule_fsts = C.CString(config.Hr.RuleFsts)
|
||||||
return &c
|
return &c
|
||||||
}
|
}
|
||||||
func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig) {
|
func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig) {
|
||||||
@@ -676,10 +697,26 @@ func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig)
|
|||||||
C.free(unsafe.Pointer(c.rule_fsts))
|
C.free(unsafe.Pointer(c.rule_fsts))
|
||||||
c.rule_fsts = nil
|
c.rule_fsts = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if c.rule_fars != nil {
|
if c.rule_fars != nil {
|
||||||
C.free(unsafe.Pointer(c.rule_fars))
|
C.free(unsafe.Pointer(c.rule_fars))
|
||||||
c.rule_fars = nil
|
c.rule_fars = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if c.hr.dict_dir != nil {
|
||||||
|
C.free(unsafe.Pointer(c.hr.dict_dir))
|
||||||
|
c.hr.dict_dir = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.hr.lexicon != nil {
|
||||||
|
C.free(unsafe.Pointer(c.hr.lexicon))
|
||||||
|
c.hr.lexicon = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.hr.rule_fsts != nil {
|
||||||
|
C.free(unsafe.Pointer(c.hr.rule_fsts))
|
||||||
|
c.hr.rule_fsts = nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Frees the internal pointer of the recognition to avoid memory leak.
|
// Frees the internal pointer of the recognition to avoid memory leak.
|
||||||
|
|||||||
Reference in New Issue
Block a user