Add Golang API for spoken language identification. (#709)

This commit is contained in:
Fangjun Kuang
2024-03-27 19:40:25 +08:00
committed by GitHub
parent 12efbf7397
commit a042f44076
10 changed files with 242 additions and 1 deletions

View File

@@ -0,0 +1,2 @@
vad-spoken-language-identification

View File

@@ -0,0 +1,5 @@
module vad-spoken-language-identification
go 1.12
replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../

View File

@@ -0,0 +1 @@
/Users/fangjun/open-source/sherpa-onnx/go-api-examples/vad-spoken-language-identification/main.go

View File

@@ -0,0 +1 @@
/Users/fangjun/open-source/sherpa-onnx/go-api-examples/vad-spoken-language-identification/run.sh

View File

@@ -783,3 +783,72 @@ func (vad *VoiceActivityDetector) Front() *SpeechSegment {
func (vad *VoiceActivityDetector) Reset() {
C.SherpaOnnxVoiceActivityDetectorReset(vad.impl)
}
// Spoken language identification
type SpokenLanguageIdentificationWhisperConfig struct {
Encoder string
Decoder string
TailPaddings int
}
type SpokenLanguageIdentificationConfig struct {
Whisper SpokenLanguageIdentificationWhisperConfig
NumThreads int
Debug int
Provider string
}
type SpokenLanguageIdentification struct {
impl *C.struct_SherpaOnnxSpokenLanguageIdentification
}
type SpokenLanguageIdentificationResult struct {
Lang string
}
func NewSpokenLanguageIdentification(config *SpokenLanguageIdentificationConfig) *SpokenLanguageIdentification {
c := C.struct_SherpaOnnxSpokenLanguageIdentificationConfig{}
c.whisper.encoder = C.CString(config.Whisper.Encoder)
defer C.free(unsafe.Pointer(c.whisper.encoder))
c.whisper.decoder = C.CString(config.Whisper.Decoder)
defer C.free(unsafe.Pointer(c.whisper.decoder))
c.whisper.tail_paddings = C.int(config.Whisper.TailPaddings)
c.num_threads = C.int(config.NumThreads)
c.debug = C.int(config.Debug)
c.provider = C.CString(config.Provider)
defer C.free(unsafe.Pointer(c.provider))
slid := &SpokenLanguageIdentification{}
slid.impl = C.SherpaOnnxCreateSpokenLanguageIdentification(&c)
return slid
}
func DeleteSpokenLanguageIdentification(slid *SpokenLanguageIdentification) {
C.SherpaOnnxDestroySpokenLanguageIdentification(slid.impl)
slid.impl = nil
}
// The user has to invoke DeleteOfflineStream() to free the returned value
// to avoid memory leak
func (slid *SpokenLanguageIdentification) CreateStream() *OfflineStream {
stream := &OfflineStream{}
stream.impl = C.SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(slid.impl)
return stream
}
func (slid *SpokenLanguageIdentification) Compute(stream *OfflineStream) *SpokenLanguageIdentificationResult {
r := C.SherpaOnnxSpokenLanguageIdentificationCompute(slid.impl, stream.impl)
// defer C.SherpaOnnxDestroySpokenLanguageIdentificationResult(r)
ans := &SpokenLanguageIdentificationResult{}
ans.Lang = C.GoString(r.lang)
return ans
}