Add Go API for speech enhancement GTCRN models (#1991)
This commit is contained in:
1
scripts/go/_internal/speech-enhancement-gtcrn/.gitignore
vendored
Normal file
1
scripts/go/_internal/speech-enhancement-gtcrn/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
speech-enhancement-gtcrn
|
||||
5
scripts/go/_internal/speech-enhancement-gtcrn/go.mod
Normal file
5
scripts/go/_internal/speech-enhancement-gtcrn/go.mod
Normal file
@@ -0,0 +1,5 @@
|
||||
module speech-enhancement-gtcrn
|
||||
|
||||
go 1.17
|
||||
|
||||
replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../
|
||||
1
scripts/go/_internal/speech-enhancement-gtcrn/main.go
Symbolic link
1
scripts/go/_internal/speech-enhancement-gtcrn/main.go
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../../go-api-examples/speech-enhancement-gtcrn/main.go
|
||||
1
scripts/go/_internal/speech-enhancement-gtcrn/run.sh
Symbolic link
1
scripts/go/_internal/speech-enhancement-gtcrn/run.sh
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../../go-api-examples/speech-enhancement-gtcrn/run.sh
|
||||
@@ -959,7 +959,6 @@ func (tts *OfflineTts) Generate(text string, sid int, speed float32) *GeneratedA
|
||||
// see https://stackoverflow.com/questions/48756732/what-does-1-30c-yourtype-do-exactly-in-cgo
|
||||
// :n:n means 0:n:n, means low:high:capacity
|
||||
samples := unsafe.Slice(audio.samples, n)
|
||||
// copy(ans.Samples, samples)
|
||||
for i := 0; i < n; i++ {
|
||||
ans.Samples[i] = float32(samples[i])
|
||||
}
|
||||
@@ -1840,3 +1839,88 @@ func (tagging *AudioTagging) Compute(s *OfflineStream, topK int32) []AudioEvent
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
type OfflineSpeechDenoiserGtcrnModelConfig struct {
|
||||
Model string
|
||||
}
|
||||
|
||||
type OfflineSpeechDenoiserModelConfig struct {
|
||||
Gtcrn OfflineSpeechDenoiserGtcrnModelConfig
|
||||
NumThreads int32
|
||||
Debug int32
|
||||
Provider string
|
||||
}
|
||||
|
||||
type OfflineSpeechDenoiserConfig struct {
|
||||
Model OfflineSpeechDenoiserModelConfig
|
||||
}
|
||||
|
||||
type OfflineSpeechDenoiser struct {
|
||||
impl *C.struct_SherpaOnnxOfflineSpeechDenoiser
|
||||
}
|
||||
|
||||
type DenoisedAudio struct {
|
||||
// Normalized samples in the range [-1, 1]
|
||||
Samples []float32
|
||||
|
||||
SampleRate int
|
||||
}
|
||||
|
||||
// Free the internal pointer inside the OfflineSpeechDenoiser to avoid memory leak.
|
||||
func DeleteOfflineSpeechDenoiser(sd *OfflineSpeechDenoiser) {
|
||||
C.SherpaOnnxDestroyOfflineSpeechDenoiser(sd.impl)
|
||||
sd.impl = nil
|
||||
}
|
||||
|
||||
// The user is responsible to invoke [DeleteOfflineSpeechDenoiser]() to free
|
||||
// the returned tts to avoid memory leak
|
||||
func NewOfflineSpeechDenoiser(config *OfflineSpeechDenoiserConfig) *OfflineSpeechDenoiser {
|
||||
c := C.struct_SherpaOnnxOfflineSpeechDenoiserConfig{}
|
||||
c.model.gtcrn.model = C.CString(config.Model.Gtcrn.Model)
|
||||
defer C.free(unsafe.Pointer(c.model.gtcrn.model))
|
||||
|
||||
c.model.num_threads = C.int(config.Model.NumThreads)
|
||||
c.model.debug = C.int(config.Model.Debug)
|
||||
|
||||
c.model.provider = C.CString(config.Model.Provider)
|
||||
defer C.free(unsafe.Pointer(c.model.provider))
|
||||
|
||||
impl := C.SherpaOnnxCreateOfflineSpeechDenoiser(&c)
|
||||
if impl == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
sd := &OfflineSpeechDenoiser{}
|
||||
sd.impl = impl
|
||||
return sd
|
||||
}
|
||||
|
||||
func (sd *OfflineSpeechDenoiser) Run(samples []float32, sampleRate int) *DenoisedAudio {
|
||||
audio := C.SherpaOnnxOfflineSpeechDenoiserRun(sd.impl, (*C.float)(&samples[0]), C.int(len(samples)), C.int(sampleRate))
|
||||
defer C.SherpaOnnxDestroyDenoisedAudio(audio)
|
||||
|
||||
ans := &DenoisedAudio{}
|
||||
ans.SampleRate = int(audio.sample_rate)
|
||||
n := int(audio.n)
|
||||
ans.Samples = make([]float32, n)
|
||||
|
||||
denoisedSamples := unsafe.Slice(audio.samples, n)
|
||||
for i := 0; i < n; i++ {
|
||||
ans.Samples[i] = float32(denoisedSamples[i])
|
||||
}
|
||||
|
||||
return ans
|
||||
}
|
||||
|
||||
func (audio *DenoisedAudio) Save(filename string) bool {
|
||||
s := C.CString(filename)
|
||||
defer C.free(unsafe.Pointer(s))
|
||||
|
||||
ok := int(C.SherpaOnnxWriteWave((*C.float)(&audio.Samples[0]), C.int(len(audio.Samples)), C.int(audio.SampleRate), s))
|
||||
|
||||
return ok == 1
|
||||
}
|
||||
|
||||
func (sd *OfflineSpeechDenoiser) SampleRate() int {
|
||||
return int(C.SherpaOnnxOfflineSpeechDenoiserGetSampleRate(sd.impl))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user