diff --git a/.github/workflows/test-go.yaml b/.github/workflows/test-go.yaml index 9f6a4bc3..d6bff9b1 100644 --- a/.github/workflows/test-go.yaml +++ b/.github/workflows/test-go.yaml @@ -142,32 +142,31 @@ jobs: name: ${{ matrix.os }}-libs path: to-upload/ - - name: Test speech enhancement (GTCRN) + - name: Test streaming decoding files shell: bash run: | - cd scripts/go/_internal/speech-enhancement-gtcrn/ - - ./run.sh - + cd scripts/go/_internal/streaming-decode-files + ls -lh + go mod tidy + cat go.mod + go build ls -lh - - name: Test audio tagging - shell: bash - run: | - cd scripts/go/_internal/audio-tagging/ + echo "Test zipformer2 CTC" + ./run-zipformer2-ctc-with-hr.sh + ./run-zipformer2-ctc.sh + rm -rf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 - ./run.sh + echo "Test transducer" + ./run-transducer.sh + rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26 - ls -lh + ./run-transducer-itn.sh + rm -rf sherpa-onnx-streaming-* - - name: Test Keyword spotting - shell: bash - run: | - cd scripts/go/_internal/keyword-spotting-from-file/ - - ./run.sh - - ls -lh + echo "Test paraformer" + ./run-paraformer.sh + rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en - name: Test non-streaming decoding files shell: bash @@ -179,6 +178,11 @@ jobs: go build ls -lh + echo "Test SenseVoice ctc" + ./run-sense-voice-small-with-hr.sh + ./run-sense-voice-small.sh + rm -rf sherpa-onnx-sense-* + echo "Test Dolphin CTC" ./run-dolphin-ctc-base.sh rm -rf sherpa-onnx-dolphin-* @@ -191,10 +195,6 @@ jobs: ./run-moonshine.sh rm -rf sherpa-onnx-* - echo "Test SenseVoice ctc" - ./run-sense-voice-small.sh - rm -rf sherpa-onnx-sense-* - echo "Test telespeech ctc" ./run-telespeech-ctc.sh rm -rf sherpa-onnx-telespeech-ctc-* @@ -224,6 +224,33 @@ jobs: ./run-tdnn-yesno.sh rm -rf sherpa-onnx-tdnn-yesno + - name: Test speech enhancement (GTCRN) + shell: bash + run: | + cd scripts/go/_internal/speech-enhancement-gtcrn/ + + ./run.sh + + ls -lh + + - name: Test audio tagging + shell: bash + run: | + cd scripts/go/_internal/audio-tagging/ + + ./run.sh + + ls -lh + + - name: Test Keyword spotting + shell: bash + run: | + cd scripts/go/_internal/keyword-spotting-from-file/ + + ./run.sh + + ls -lh + - name: Test adding punctuation shell: bash run: | @@ -301,28 +328,3 @@ jobs: with: name: tts-waves-${{ matrix.os }} path: tts-waves - - - name: Test streaming decoding files - shell: bash - run: | - cd scripts/go/_internal/streaming-decode-files - ls -lh - go mod tidy - cat go.mod - go build - ls -lh - - echo "Test zipformer2 CTC" - ./run-zipformer2-ctc.sh - rm -rf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 - - echo "Test transducer" - ./run-transducer.sh - rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26 - - ./run-transducer-itn.sh - rm -rf sherpa-onnx-streaming-* - - echo "Test paraformer" - ./run-paraformer.sh - rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en diff --git a/go-api-examples/non-streaming-decode-files/main.go b/go-api-examples/non-streaming-decode-files/main.go index 4da40d91..dc9bbb1a 100644 --- a/go-api-examples/non-streaming-decode-files/main.go +++ b/go-api-examples/non-streaming-decode-files/main.go @@ -66,6 +66,10 @@ func main() { flag.StringVar(&config.RuleFsts, "rule-fsts", "", "If not empty, path to rule fst for inverse text normalization") flag.StringVar(&config.RuleFars, "rule-fars", "", "If not empty, path to rule fst archives for inverse text normalization") + flag.StringVar(&config.Hr.DictDir, "hr-dict-dir", "", "If not empty, path to the jieba dict dir for homonphone replacer") + flag.StringVar(&config.Hr.Lexicon, "hr-lexicon", "", "If not empty, path to the lexicon.txt for homonphone replacer") + flag.StringVar(&config.Hr.RuleFsts, "hr-rule-fsts", "", "If not empty, path to the replace.fst for homonphone replacer") + flag.Parse() if len(flag.Args()) != 1 { diff --git a/go-api-examples/non-streaming-decode-files/run-sense-voice-small-with-hr.sh b/go-api-examples/non-streaming-decode-files/run-sense-voice-small-with-hr.sh new file mode 100755 index 00000000..067579cb --- /dev/null +++ b/go-api-examples/non-streaming-decode-files/run-sense-voice-small-with-hr.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +set -ex + +if [ ! -d sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 + tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 + rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 +fi + +if [ ! -d dict ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2 + tar xf dict.tar.bz2 + rm dict.tar.bz2 + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt +fi + +go mod tidy +go build + +./non-streaming-decode-files \ + --sense-voice-model ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx \ + --tokens ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt \ + --debug 1 \ + --hr-dict-dir ./dict \ + --hr-lexicon ./lexicon.txt \ + --hr-rule-fsts ./replace.fst \ + ./test-hr.wav diff --git a/go-api-examples/streaming-decode-files/main.go b/go-api-examples/streaming-decode-files/main.go index d96b5333..f49b2ab6 100644 --- a/go-api-examples/streaming-decode-files/main.go +++ b/go-api-examples/streaming-decode-files/main.go @@ -32,6 +32,9 @@ func main() { flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search") flag.StringVar(&config.RuleFsts, "rule-fsts", "", "If not empty, path to rule fst for inverse text normalization") flag.StringVar(&config.RuleFars, "rule-fars", "", "If not empty, path to rule fst archives for inverse text normalization") + flag.StringVar(&config.Hr.DictDir, "hr-dict-dir", "", "If not empty, path to the jieba dict dir for homonphone replacer") + flag.StringVar(&config.Hr.Lexicon, "hr-lexicon", "", "If not empty, path to the lexicon.txt for homonphone replacer") + flag.StringVar(&config.Hr.RuleFsts, "hr-rule-fsts", "", "If not empty, path to the replace.fst for homonphone replacer") flag.Parse() diff --git a/go-api-examples/streaming-decode-files/run-zipformer2-ctc-with-hr.sh b/go-api-examples/streaming-decode-files/run-zipformer2-ctc-with-hr.sh new file mode 100755 index 00000000..ff46922b --- /dev/null +++ b/go-api-examples/streaming-decode-files/run-zipformer2-ctc-with-hr.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +set -ex + +if [ ! -d sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 + tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 + rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 +fi + +if [ ! -d dict ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2 + tar xf dict.tar.bz2 + rm dict.tar.bz2 + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt +fi + +go mod tidy +go build + +./streaming-decode-files \ + --zipformer2-ctc ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx \ + --tokens ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt \ + --hr-dict-dir ./dict \ + --hr-lexicon ./lexicon.txt \ + --hr-rule-fsts ./replace.fst \ + ./test-hr.wav diff --git a/scripts/go/_internal/non-streaming-decode-files/run-sense-voice-small-with-hr.sh b/scripts/go/_internal/non-streaming-decode-files/run-sense-voice-small-with-hr.sh new file mode 120000 index 00000000..c10b193e --- /dev/null +++ b/scripts/go/_internal/non-streaming-decode-files/run-sense-voice-small-with-hr.sh @@ -0,0 +1 @@ +../../../../go-api-examples/non-streaming-decode-files/run-sense-voice-small-with-hr.sh \ No newline at end of file diff --git a/scripts/go/_internal/streaming-decode-files/run-zipformer2-ctc-with-hr.sh b/scripts/go/_internal/streaming-decode-files/run-zipformer2-ctc-with-hr.sh new file mode 120000 index 00000000..a71856fd --- /dev/null +++ b/scripts/go/_internal/streaming-decode-files/run-zipformer2-ctc-with-hr.sh @@ -0,0 +1 @@ +../../../../go-api-examples/streaming-decode-files/run-zipformer2-ctc-with-hr.sh \ No newline at end of file diff --git a/scripts/go/sherpa_onnx.go b/scripts/go/sherpa_onnx.go index 613cda2d..28e877f9 100644 --- a/scripts/go/sherpa_onnx.go +++ b/scripts/go/sherpa_onnx.go @@ -108,6 +108,12 @@ type OnlineCtcFstDecoderConfig struct { MaxActive int } +type HomophoneReplacerConfig struct { + DictDir string + Lexicon string + RuleFsts string +} + // Configuration for the online/streaming recognizer. type OnlineRecognizerConfig struct { FeatConfig FeatureConfig @@ -137,6 +143,7 @@ type OnlineRecognizerConfig struct { RuleFars string HotwordsBuf string HotwordsBufSize int + Hr HomophoneReplacerConfig } // It contains the recognition result for a online stream. @@ -239,6 +246,15 @@ func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer { defer C.free(unsafe.Pointer(c.ctc_fst_decoder_config.graph)) c.ctc_fst_decoder_config.max_active = C.int(config.CtcFstDecoderConfig.MaxActive) + c.hr.dict_dir = C.CString(config.Hr.DictDir) + defer C.free(unsafe.Pointer(c.hr.dict_dir)) + + c.hr.lexicon = C.CString(config.Hr.Lexicon) + defer C.free(unsafe.Pointer(c.hr.lexicon)) + + c.hr.rule_fsts = C.CString(config.Hr.RuleFsts) + defer C.free(unsafe.Pointer(c.hr.rule_fsts)) + impl := C.SherpaOnnxCreateOnlineRecognizer(&c) if impl == nil { return nil @@ -462,6 +478,7 @@ type OfflineRecognizerConfig struct { BlankPenalty float32 RuleFsts string RuleFars string + Hr HomophoneReplacerConfig } // It wraps a pointer from C @@ -549,6 +566,10 @@ func newCOfflineRecognizerConfig(config *OfflineRecognizerConfig) *C.struct_Sher c.rule_fsts = C.CString(config.RuleFsts) c.rule_fars = C.CString(config.RuleFars) + + c.hr.dict_dir = C.CString(config.Hr.DictDir) + c.hr.lexicon = C.CString(config.Hr.Lexicon) + c.hr.rule_fsts = C.CString(config.Hr.RuleFsts) return &c } func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig) { @@ -676,10 +697,26 @@ func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig) C.free(unsafe.Pointer(c.rule_fsts)) c.rule_fsts = nil } + if c.rule_fars != nil { C.free(unsafe.Pointer(c.rule_fars)) c.rule_fars = nil } + + if c.hr.dict_dir != nil { + C.free(unsafe.Pointer(c.hr.dict_dir)) + c.hr.dict_dir = nil + } + + if c.hr.lexicon != nil { + C.free(unsafe.Pointer(c.hr.lexicon)) + c.hr.lexicon = nil + } + + if c.hr.rule_fsts != nil { + C.free(unsafe.Pointer(c.hr.rule_fsts)) + c.hr.rule_fsts = nil + } } // Frees the internal pointer of the recognition to avoid memory leak.