Add Go API for Moonshine models (#1479)
This commit is contained in:
90
.github/workflows/test-go.yaml
vendored
90
.github/workflows/test-go.yaml
vendored
@@ -134,6 +134,53 @@ jobs:
|
||||
name: ${{ matrix.os }}-libs
|
||||
path: to-upload/
|
||||
|
||||
- name: Test non-streaming decoding files
|
||||
shell: bash
|
||||
run: |
|
||||
cd scripts/go/_internal/non-streaming-decode-files/
|
||||
ls -lh
|
||||
go mod tidy
|
||||
cat go.mod
|
||||
go build
|
||||
ls -lh
|
||||
|
||||
echo "Test Moonshine"
|
||||
./run-moonshine.sh
|
||||
rm -rf sherpa-onnx-*
|
||||
|
||||
echo "Test SenseVoice ctc"
|
||||
./run-sense-voice-small.sh
|
||||
rm -rf sherpa-onnx-sense-*
|
||||
|
||||
echo "Test telespeech ctc"
|
||||
./run-telespeech-ctc.sh
|
||||
rm -rf sherpa-onnx-telespeech-ctc-*
|
||||
|
||||
echo "Test transducer"
|
||||
./run-transducer.sh
|
||||
rm -rf sherpa-onnx-zipformer-en-2023-06-26
|
||||
|
||||
echo "Test transducer"
|
||||
./run-transducer.sh
|
||||
rm -rf sherpa-onnx-zipformer-en-2023-06-26
|
||||
|
||||
echo "Test paraformer"
|
||||
./run-paraformer.sh
|
||||
./run-paraformer-itn.sh
|
||||
rm -rf sherpa-onnx-paraformer-zh-2023-09-14
|
||||
|
||||
echo "Test NeMo CTC"
|
||||
./run-nemo-ctc.sh
|
||||
rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium
|
||||
|
||||
echo "Test Whisper tiny.en"
|
||||
./run-whisper.sh
|
||||
rm -rf sherpa-onnx-whisper-tiny.en
|
||||
|
||||
echo "Test Tdnn yesno"
|
||||
./run-tdnn-yesno.sh
|
||||
rm -rf sherpa-onnx-tdnn-yesno
|
||||
|
||||
- name: Test adding punctuation
|
||||
shell: bash
|
||||
run: |
|
||||
@@ -193,49 +240,6 @@ jobs:
|
||||
name: tts-waves-${{ matrix.os }}
|
||||
path: tts-waves
|
||||
|
||||
- name: Test non-streaming decoding files
|
||||
shell: bash
|
||||
run: |
|
||||
cd scripts/go/_internal/non-streaming-decode-files/
|
||||
ls -lh
|
||||
go mod tidy
|
||||
cat go.mod
|
||||
go build
|
||||
ls -lh
|
||||
|
||||
echo "Test SenseVoice ctc"
|
||||
./run-sense-voice-small.sh
|
||||
rm -rf sherpa-onnx-sense-*
|
||||
|
||||
echo "Test telespeech ctc"
|
||||
./run-telespeech-ctc.sh
|
||||
rm -rf sherpa-onnx-telespeech-ctc-*
|
||||
|
||||
echo "Test transducer"
|
||||
./run-transducer.sh
|
||||
rm -rf sherpa-onnx-zipformer-en-2023-06-26
|
||||
|
||||
echo "Test transducer"
|
||||
./run-transducer.sh
|
||||
rm -rf sherpa-onnx-zipformer-en-2023-06-26
|
||||
|
||||
echo "Test paraformer"
|
||||
./run-paraformer.sh
|
||||
./run-paraformer-itn.sh
|
||||
rm -rf sherpa-onnx-paraformer-zh-2023-09-14
|
||||
|
||||
echo "Test NeMo CTC"
|
||||
./run-nemo-ctc.sh
|
||||
rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium
|
||||
|
||||
echo "Test Whisper tiny.en"
|
||||
./run-whisper.sh
|
||||
rm -rf sherpa-onnx-whisper-tiny.en
|
||||
|
||||
echo "Test Tdnn yesno"
|
||||
./run-tdnn-yesno.sh
|
||||
rm -rf sherpa-onnx-tdnn-yesno
|
||||
|
||||
- name: Test streaming decoding files
|
||||
shell: bash
|
||||
run: |
|
||||
|
||||
@@ -6,28 +6,41 @@ Please refer to the documentation
|
||||
https://k2-fsa.github.io/sherpa/onnx/go-api/index.html
|
||||
for details.
|
||||
|
||||
- [./add-punctuation](./add-punctuation) It shows how to use
|
||||
a punctuation model to add punctuations to text
|
||||
|
||||
- [./non-streaming-decode-files](./non-streaming-decode-files) It shows how to use
|
||||
a non-streaming ASR model to decode files
|
||||
|
||||
- [./non-streaming-speaker-diarization](./non-streaming-speaker-diarization) It shows how to use
|
||||
a speaker segmentation model and a speaker embedding model for speaker diarization.
|
||||
|
||||
- [./non-streaming-tts](./non-streaming-tts) It shows how to use a non-streaming TTS
|
||||
model to convert text to speech
|
||||
|
||||
- [./real-time-speech-recognition-from-microphone](./real-time-speech-recognition-from-microphone)
|
||||
It shows how to use a streaming ASR model to recognize speech from a microphone in real-time
|
||||
|
||||
- [./speaker-identification](./speaker-identification) It shows how to use a speaker
|
||||
embedding model for speaker identification.
|
||||
|
||||
- [./streaming-decode-files](./streaming-decode-files) It shows how to use a streaming
|
||||
model for streaming speech recognition
|
||||
|
||||
- [./streaming-hlg-decoding](./streaming-hlg-decoding) It shows how to use a streaming
|
||||
model for streaming speech recognition with HLG decoding
|
||||
|
||||
- [./vad](./vad) It shows how to use silero VAD with Golang.
|
||||
|
||||
- [./vad-asr-whisper](./vad-asr-whisper) It shows how to use silero VAD + Whisper
|
||||
- [./vad-asr-paraformer](./vad-asr-paraformer) It shows how to use silero VAD + Paraformer
|
||||
for speech recognition.
|
||||
|
||||
- [./vad-asr-paraformer](./vad-asr-paraformer) It shows how to use silero VAD + Paraformer
|
||||
- [./vad-asr-whisper](./vad-asr-whisper) It shows how to use silero VAD + Whisper
|
||||
|
||||
- [./vad-speaker-identification](./vad-speaker-identification) It shows how to use Go API for VAD + speaker identification.
|
||||
for speech recognition.
|
||||
|
||||
- [./vad-spoken-language-identification](./vad-spoken-language-identification) It shows how to use silero VAD + Whisper
|
||||
for spoken language identification.
|
||||
|
||||
- [./speaker-identification](./speaker-identification) It shows how to use Go API for speaker identification.
|
||||
|
||||
- [./vad-speaker-identification](./vad-speaker-identification) It shows how to use Go API for VAD + speaker identification.
|
||||
|
||||
[sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx
|
||||
|
||||
@@ -34,6 +34,11 @@ func main() {
|
||||
flag.StringVar(&config.ModelConfig.Whisper.Task, "whisper-task", "transcribe", "transcribe or translate")
|
||||
flag.IntVar(&config.ModelConfig.Whisper.TailPaddings, "whisper-tail-paddings", -1, "tail paddings for whisper")
|
||||
|
||||
flag.StringVar(&config.ModelConfig.Moonshine.Preprocessor, "moonshine-preprocessor", "", "Path to the moonshine preprocessor model")
|
||||
flag.StringVar(&config.ModelConfig.Moonshine.Encoder, "moonshine-encoder", "", "Path to the moonshine encoder model")
|
||||
flag.StringVar(&config.ModelConfig.Moonshine.UncachedDecoder, "moonshine-uncached-decoder", "", "Path to the moonshine uncached decoder model")
|
||||
flag.StringVar(&config.ModelConfig.Moonshine.CachedDecoder, "moonshine-cached-decoder", "", "Path to the moonshine cached decoder model")
|
||||
|
||||
flag.StringVar(&config.ModelConfig.Tdnn.Model, "tdnn-model", "", "Path to the tdnn model")
|
||||
|
||||
flag.StringVar(&config.ModelConfig.SenseVoice.Model, "sense-voice-model", "", "Path to the SenseVoice model")
|
||||
@@ -85,12 +90,8 @@ func main() {
|
||||
log.Println("Emotion: " + result.Emotion)
|
||||
log.Println("Lang: " + result.Lang)
|
||||
log.Println("Event: " + result.Event)
|
||||
for _, v := range result.Timestamps {
|
||||
log.Printf("Timestamp: %+v\n", v)
|
||||
}
|
||||
for _, v := range result.Tokens {
|
||||
log.Println("Token: " + v)
|
||||
}
|
||||
log.Printf("Timestamp: %v\n", result.Timestamps)
|
||||
log.Printf("Tokens: %v\n", result.Tokens)
|
||||
log.Printf("Wave duration: %v seconds", float32(len(samples))/float32(sampleRate))
|
||||
}
|
||||
|
||||
|
||||
21
go-api-examples/non-streaming-decode-files/run-moonshine.sh
Executable file
21
go-api-examples/non-streaming-decode-files/run-moonshine.sh
Executable file
@@ -0,0 +1,21 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
|
||||
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
|
||||
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
|
||||
fi
|
||||
|
||||
go mod tidy
|
||||
go build
|
||||
|
||||
./non-streaming-decode-files \
|
||||
--moonshine-preprocessor=./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx \
|
||||
--moonshine-encoder=./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx \
|
||||
--moonshine-uncached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx \
|
||||
--moonshine-cached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx \
|
||||
--tokens=./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt \
|
||||
./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav
|
||||
|
||||
1
scripts/go/_internal/non-streaming-decode-files/run-moonshine.sh
Symbolic link
1
scripts/go/_internal/non-streaming-decode-files/run-moonshine.sh
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../../go-api-examples/non-streaming-decode-files/run-moonshine.sh
|
||||
@@ -382,6 +382,13 @@ type OfflineWhisperModelConfig struct {
|
||||
TailPaddings int
|
||||
}
|
||||
|
||||
type OfflineMoonshineModelConfig struct {
|
||||
Preprocessor string
|
||||
Encoder string
|
||||
UncachedDecoder string
|
||||
CachedDecoder string
|
||||
}
|
||||
|
||||
type OfflineTdnnModelConfig struct {
|
||||
Model string
|
||||
}
|
||||
@@ -405,6 +412,7 @@ type OfflineModelConfig struct {
|
||||
Whisper OfflineWhisperModelConfig
|
||||
Tdnn OfflineTdnnModelConfig
|
||||
SenseVoice OfflineSenseVoiceModelConfig
|
||||
Moonshine OfflineMoonshineModelConfig
|
||||
Tokens string // Path to tokens.txt
|
||||
|
||||
// Number of threads to use for neural network computation
|
||||
@@ -515,6 +523,18 @@ func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer {
|
||||
|
||||
c.model_config.sense_voice.use_itn = C.int(config.ModelConfig.SenseVoice.UseInverseTextNormalization)
|
||||
|
||||
c.model_config.moonshine.preprocessor = C.CString(config.ModelConfig.Moonshine.Preprocessor)
|
||||
defer C.free(unsafe.Pointer(c.model_config.moonshine.preprocessor))
|
||||
|
||||
c.model_config.moonshine.encoder = C.CString(config.ModelConfig.Moonshine.Encoder)
|
||||
defer C.free(unsafe.Pointer(c.model_config.moonshine.encoder))
|
||||
|
||||
c.model_config.moonshine.uncached_decoder = C.CString(config.ModelConfig.Moonshine.UncachedDecoder)
|
||||
defer C.free(unsafe.Pointer(c.model_config.moonshine.uncached_decoder))
|
||||
|
||||
c.model_config.moonshine.cached_decoder = C.CString(config.ModelConfig.Moonshine.CachedDecoder)
|
||||
defer C.free(unsafe.Pointer(c.model_config.moonshine.cached_decoder))
|
||||
|
||||
c.model_config.tokens = C.CString(config.ModelConfig.Tokens)
|
||||
defer C.free(unsafe.Pointer(c.model_config.tokens))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user