Add Go API for SenseVoice (#1154)
This commit is contained in:
4
.github/workflows/test-go.yaml
vendored
4
.github/workflows/test-go.yaml
vendored
@@ -191,6 +191,10 @@ jobs:
|
|||||||
go build
|
go build
|
||||||
ls -lh
|
ls -lh
|
||||||
|
|
||||||
|
echo "Test SenseVoice ctc"
|
||||||
|
./run-sense-voice-small.sh
|
||||||
|
rm -rf sherpa-onnx-sense-*
|
||||||
|
|
||||||
echo "Test telespeech ctc"
|
echo "Test telespeech ctc"
|
||||||
./run-telespeech-ctc.sh
|
./run-telespeech-ctc.sh
|
||||||
rm -rf sherpa-onnx-telespeech-ctc-*
|
rm -rf sherpa-onnx-telespeech-ctc-*
|
||||||
|
|||||||
@@ -35,6 +35,10 @@ func main() {
|
|||||||
|
|
||||||
flag.StringVar(&config.ModelConfig.Tdnn.Model, "tdnn-model", "", "Path to the tdnn model")
|
flag.StringVar(&config.ModelConfig.Tdnn.Model, "tdnn-model", "", "Path to the tdnn model")
|
||||||
|
|
||||||
|
flag.StringVar(&config.ModelConfig.SenseVoice.Model, "sense-voice-model", "", "Path to the SenseVoice model")
|
||||||
|
flag.StringVar(&config.ModelConfig.SenseVoice.Language, "sense-voice-language", "", "If not empty, specify the Language for the input wave")
|
||||||
|
flag.IntVar(&config.ModelConfig.SenseVoice.UseInverseTextNormalization, "sense-voice-use-itn", 1, " 1 to use inverse text normalization")
|
||||||
|
|
||||||
flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file")
|
flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file")
|
||||||
flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing")
|
flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing")
|
||||||
flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message")
|
flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message")
|
||||||
|
|||||||
18
go-api-examples/non-streaming-decode-files/run-sense-voice-small.sh
Executable file
18
go-api-examples/non-streaming-decode-files/run-sense-voice-small.sh
Executable file
@@ -0,0 +1,18 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
if [ ! -d sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
|
||||||
|
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
go mod tidy
|
||||||
|
go build
|
||||||
|
|
||||||
|
./non-streaming-decode-files \
|
||||||
|
--sense-voice-model ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx \
|
||||||
|
--tokens ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt \
|
||||||
|
--debug 0 \
|
||||||
|
./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
../../../../go-api-examples/non-streaming-decode-files/run-sense-voice-small.sh
|
||||||
@@ -370,6 +370,12 @@ type OfflineTdnnModelConfig struct {
|
|||||||
Model string
|
Model string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type OfflineSenseVoiceModelConfig struct {
|
||||||
|
Model string
|
||||||
|
Language string
|
||||||
|
UseInverseTextNormalization int
|
||||||
|
}
|
||||||
|
|
||||||
// Configuration for offline LM.
|
// Configuration for offline LM.
|
||||||
type OfflineLMConfig struct {
|
type OfflineLMConfig struct {
|
||||||
Model string // Path to the model
|
Model string // Path to the model
|
||||||
@@ -382,6 +388,7 @@ type OfflineModelConfig struct {
|
|||||||
NemoCTC OfflineNemoEncDecCtcModelConfig
|
NemoCTC OfflineNemoEncDecCtcModelConfig
|
||||||
Whisper OfflineWhisperModelConfig
|
Whisper OfflineWhisperModelConfig
|
||||||
Tdnn OfflineTdnnModelConfig
|
Tdnn OfflineTdnnModelConfig
|
||||||
|
SenseVoice OfflineSenseVoiceModelConfig
|
||||||
Tokens string // Path to tokens.txt
|
Tokens string // Path to tokens.txt
|
||||||
|
|
||||||
// Number of threads to use for neural network computation
|
// Number of threads to use for neural network computation
|
||||||
@@ -478,6 +485,14 @@ func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer {
|
|||||||
c.model_config.tdnn.model = C.CString(config.ModelConfig.Tdnn.Model)
|
c.model_config.tdnn.model = C.CString(config.ModelConfig.Tdnn.Model)
|
||||||
defer C.free(unsafe.Pointer(c.model_config.tdnn.model))
|
defer C.free(unsafe.Pointer(c.model_config.tdnn.model))
|
||||||
|
|
||||||
|
c.model_config.sense_voice.model = C.CString(config.ModelConfig.SenseVoice.Model)
|
||||||
|
defer C.free(unsafe.Pointer(c.model_config.sense_voice.model))
|
||||||
|
|
||||||
|
c.model_config.sense_voice.language = C.CString(config.ModelConfig.SenseVoice.Language)
|
||||||
|
defer C.free(unsafe.Pointer(c.model_config.sense_voice.language))
|
||||||
|
|
||||||
|
c.model_config.sense_voice.use_itn = C.int(config.ModelConfig.SenseVoice.UseInverseTextNormalization)
|
||||||
|
|
||||||
c.model_config.tokens = C.CString(config.ModelConfig.Tokens)
|
c.model_config.tokens = C.CString(config.ModelConfig.Tokens)
|
||||||
defer C.free(unsafe.Pointer(c.model_config.tokens))
|
defer C.free(unsafe.Pointer(c.model_config.tokens))
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user