Add C API for offline TTS. (#373)

This commit is contained in:
Fangjun Kuang
2023-10-19 17:38:23 +08:00
committed by GitHub
parent 58ab7e77f2
commit ea7c45b60c
16 changed files with 562 additions and 24 deletions

View File

@@ -12,8 +12,10 @@
#include "sherpa-onnx/csrc/circular-buffer.h"
#include "sherpa-onnx/csrc/display.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/offline-tts.h"
#include "sherpa-onnx/csrc/online-recognizer.h"
#include "sherpa-onnx/csrc/voice-activity-detector.h"
#include "sherpa-onnx/csrc/wave-writer.h"
struct SherpaOnnxOnlineRecognizer {
std::unique_ptr<sherpa_onnx::OnlineRecognizer> impl;
@@ -204,12 +206,14 @@ const SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult(
}
void DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult *r) {
delete[] r->text;
delete[] r->json;
delete[] r->tokens;
delete[] r->tokens_arr;
delete[] r->timestamps;
delete r;
if (r) {
delete[] r->text;
delete[] r->json;
delete[] r->tokens;
delete[] r->tokens_arr;
delete[] r->timestamps;
delete r;
}
}
void Reset(SherpaOnnxOnlineRecognizer *recognizer,
@@ -385,9 +389,11 @@ const SherpaOnnxOfflineRecognizerResult *GetOfflineStreamResult(
void DestroyOfflineRecognizerResult(
const SherpaOnnxOfflineRecognizerResult *r) {
delete[] r->text;
delete[] r->timestamps;
delete r;
if (r) {
delete[] r->text;
delete[] r->timestamps;
delete r;
}
}
// ============================================================
@@ -493,18 +499,16 @@ int32_t SherpaOnnxVoiceActivityDetectorDetected(
return p->impl->IsSpeechDetected();
}
void SherpaOnnxVoiceActivityDetectorPop(
SherpaOnnxVoiceActivityDetector *p) {
void SherpaOnnxVoiceActivityDetectorPop(SherpaOnnxVoiceActivityDetector *p) {
p->impl->Pop();
}
void SherpaOnnxVoiceActivityDetectorClear(
SherpaOnnxVoiceActivityDetector *p) {
void SherpaOnnxVoiceActivityDetectorClear(SherpaOnnxVoiceActivityDetector *p) {
p->impl->Clear();
}
const SherpaOnnxSpeechSegment *
SherpaOnnxVoiceActivityDetectorFront(SherpaOnnxVoiceActivityDetector *p) {
const SherpaOnnxSpeechSegment *SherpaOnnxVoiceActivityDetectorFront(
SherpaOnnxVoiceActivityDetector *p) {
const sherpa_onnx::SpeechSegment &segment = p->impl->Front();
SherpaOnnxSpeechSegment *ans = new SherpaOnnxSpeechSegment;
@@ -517,10 +521,81 @@ SherpaOnnxVoiceActivityDetectorFront(SherpaOnnxVoiceActivityDetector *p) {
}
void SherpaOnnxDestroySpeechSegment(const SherpaOnnxSpeechSegment *p) {
delete[] p->samples;
delete p;
if (p) {
delete[] p->samples;
delete p;
}
}
void SherpaOnnxVoiceActivityDetectorReset(SherpaOnnxVoiceActivityDetector *p) {
p->impl->Reset();
}
struct SherpaOnnxOfflineTts {
std::unique_ptr<sherpa_onnx::OfflineTts> impl;
};
SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
const SherpaOnnxOfflineTtsConfig *config) {
sherpa_onnx::OfflineTtsConfig tts_config;
tts_config.model.vits.model = SHERPA_ONNX_OR(config->model.vits.model, "");
tts_config.model.vits.lexicon =
SHERPA_ONNX_OR(config->model.vits.lexicon, "");
tts_config.model.vits.tokens = SHERPA_ONNX_OR(config->model.vits.tokens, "");
tts_config.model.vits.noise_scale =
SHERPA_ONNX_OR(config->model.vits.noise_scale, 0.667);
tts_config.model.vits.noise_scale_w =
SHERPA_ONNX_OR(config->model.vits.noise_scale_w, 0.8);
tts_config.model.vits.length_scale =
SHERPA_ONNX_OR(config->model.vits.length_scale, 1.0);
tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
tts_config.model.debug = config->model.debug;
tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
if (tts_config.model.debug) {
fprintf(stderr, "%s\n", tts_config.ToString().c_str());
}
SherpaOnnxOfflineTts *tts = new SherpaOnnxOfflineTts;
tts->impl = std::make_unique<sherpa_onnx::OfflineTts>(tts_config);
return tts;
}
void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts) { delete tts; }
const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate(
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid) {
sherpa_onnx::GeneratedAudio audio = tts->impl->Generate(text, sid);
if (audio.samples.empty()) {
return nullptr;
}
SherpaOnnxGeneratedAudio *ans = new SherpaOnnxGeneratedAudio;
float *samples = new float[audio.samples.size()];
std::copy(audio.samples.begin(), audio.samples.end(), samples);
ans->samples = samples;
ans->n = audio.samples.size();
ans->sample_rate = audio.sample_rate;
return ans;
}
SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTtsGeneratedAudio(
const SherpaOnnxGeneratedAudio *p) {
if (p) {
delete[] p->samples;
delete p;
}
}
int32_t SherpaOnnxDestroyOfflineWriteWave(const SherpaOnnxGeneratedAudio *p,
const char *filename) {
return sherpa_onnx::WriteWave(filename, p->sample_rate, p->samples, p->n);
}