Add C API for offline TTS. (#373)
This commit is contained in:
@@ -12,8 +12,10 @@
|
||||
#include "sherpa-onnx/csrc/circular-buffer.h"
|
||||
#include "sherpa-onnx/csrc/display.h"
|
||||
#include "sherpa-onnx/csrc/offline-recognizer.h"
|
||||
#include "sherpa-onnx/csrc/offline-tts.h"
|
||||
#include "sherpa-onnx/csrc/online-recognizer.h"
|
||||
#include "sherpa-onnx/csrc/voice-activity-detector.h"
|
||||
#include "sherpa-onnx/csrc/wave-writer.h"
|
||||
|
||||
struct SherpaOnnxOnlineRecognizer {
|
||||
std::unique_ptr<sherpa_onnx::OnlineRecognizer> impl;
|
||||
@@ -204,12 +206,14 @@ const SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult(
|
||||
}
|
||||
|
||||
void DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult *r) {
|
||||
delete[] r->text;
|
||||
delete[] r->json;
|
||||
delete[] r->tokens;
|
||||
delete[] r->tokens_arr;
|
||||
delete[] r->timestamps;
|
||||
delete r;
|
||||
if (r) {
|
||||
delete[] r->text;
|
||||
delete[] r->json;
|
||||
delete[] r->tokens;
|
||||
delete[] r->tokens_arr;
|
||||
delete[] r->timestamps;
|
||||
delete r;
|
||||
}
|
||||
}
|
||||
|
||||
void Reset(SherpaOnnxOnlineRecognizer *recognizer,
|
||||
@@ -385,9 +389,11 @@ const SherpaOnnxOfflineRecognizerResult *GetOfflineStreamResult(
|
||||
|
||||
void DestroyOfflineRecognizerResult(
|
||||
const SherpaOnnxOfflineRecognizerResult *r) {
|
||||
delete[] r->text;
|
||||
delete[] r->timestamps;
|
||||
delete r;
|
||||
if (r) {
|
||||
delete[] r->text;
|
||||
delete[] r->timestamps;
|
||||
delete r;
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
@@ -493,18 +499,16 @@ int32_t SherpaOnnxVoiceActivityDetectorDetected(
|
||||
return p->impl->IsSpeechDetected();
|
||||
}
|
||||
|
||||
void SherpaOnnxVoiceActivityDetectorPop(
|
||||
SherpaOnnxVoiceActivityDetector *p) {
|
||||
void SherpaOnnxVoiceActivityDetectorPop(SherpaOnnxVoiceActivityDetector *p) {
|
||||
p->impl->Pop();
|
||||
}
|
||||
|
||||
void SherpaOnnxVoiceActivityDetectorClear(
|
||||
SherpaOnnxVoiceActivityDetector *p) {
|
||||
void SherpaOnnxVoiceActivityDetectorClear(SherpaOnnxVoiceActivityDetector *p) {
|
||||
p->impl->Clear();
|
||||
}
|
||||
|
||||
const SherpaOnnxSpeechSegment *
|
||||
SherpaOnnxVoiceActivityDetectorFront(SherpaOnnxVoiceActivityDetector *p) {
|
||||
const SherpaOnnxSpeechSegment *SherpaOnnxVoiceActivityDetectorFront(
|
||||
SherpaOnnxVoiceActivityDetector *p) {
|
||||
const sherpa_onnx::SpeechSegment &segment = p->impl->Front();
|
||||
|
||||
SherpaOnnxSpeechSegment *ans = new SherpaOnnxSpeechSegment;
|
||||
@@ -517,10 +521,81 @@ SherpaOnnxVoiceActivityDetectorFront(SherpaOnnxVoiceActivityDetector *p) {
|
||||
}
|
||||
|
||||
void SherpaOnnxDestroySpeechSegment(const SherpaOnnxSpeechSegment *p) {
|
||||
delete[] p->samples;
|
||||
delete p;
|
||||
if (p) {
|
||||
delete[] p->samples;
|
||||
delete p;
|
||||
}
|
||||
}
|
||||
|
||||
void SherpaOnnxVoiceActivityDetectorReset(SherpaOnnxVoiceActivityDetector *p) {
|
||||
p->impl->Reset();
|
||||
}
|
||||
|
||||
struct SherpaOnnxOfflineTts {
|
||||
std::unique_ptr<sherpa_onnx::OfflineTts> impl;
|
||||
};
|
||||
|
||||
SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
|
||||
const SherpaOnnxOfflineTtsConfig *config) {
|
||||
sherpa_onnx::OfflineTtsConfig tts_config;
|
||||
|
||||
tts_config.model.vits.model = SHERPA_ONNX_OR(config->model.vits.model, "");
|
||||
tts_config.model.vits.lexicon =
|
||||
SHERPA_ONNX_OR(config->model.vits.lexicon, "");
|
||||
tts_config.model.vits.tokens = SHERPA_ONNX_OR(config->model.vits.tokens, "");
|
||||
tts_config.model.vits.noise_scale =
|
||||
SHERPA_ONNX_OR(config->model.vits.noise_scale, 0.667);
|
||||
tts_config.model.vits.noise_scale_w =
|
||||
SHERPA_ONNX_OR(config->model.vits.noise_scale_w, 0.8);
|
||||
tts_config.model.vits.length_scale =
|
||||
SHERPA_ONNX_OR(config->model.vits.length_scale, 1.0);
|
||||
|
||||
tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
|
||||
tts_config.model.debug = config->model.debug;
|
||||
tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
|
||||
|
||||
if (tts_config.model.debug) {
|
||||
fprintf(stderr, "%s\n", tts_config.ToString().c_str());
|
||||
}
|
||||
|
||||
SherpaOnnxOfflineTts *tts = new SherpaOnnxOfflineTts;
|
||||
|
||||
tts->impl = std::make_unique<sherpa_onnx::OfflineTts>(tts_config);
|
||||
|
||||
return tts;
|
||||
}
|
||||
|
||||
void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts) { delete tts; }
|
||||
|
||||
const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate(
|
||||
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid) {
|
||||
sherpa_onnx::GeneratedAudio audio = tts->impl->Generate(text, sid);
|
||||
|
||||
if (audio.samples.empty()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
SherpaOnnxGeneratedAudio *ans = new SherpaOnnxGeneratedAudio;
|
||||
|
||||
float *samples = new float[audio.samples.size()];
|
||||
std::copy(audio.samples.begin(), audio.samples.end(), samples);
|
||||
|
||||
ans->samples = samples;
|
||||
ans->n = audio.samples.size();
|
||||
ans->sample_rate = audio.sample_rate;
|
||||
|
||||
return ans;
|
||||
}
|
||||
|
||||
SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTtsGeneratedAudio(
|
||||
const SherpaOnnxGeneratedAudio *p) {
|
||||
if (p) {
|
||||
delete[] p->samples;
|
||||
delete p;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t SherpaOnnxDestroyOfflineWriteWave(const SherpaOnnxGeneratedAudio *p,
|
||||
const char *filename) {
|
||||
return sherpa_onnx::WriteWave(filename, p->sample_rate, p->samples, p->n);
|
||||
}
|
||||
|
||||
@@ -595,6 +595,62 @@ SHERPA_ONNX_API void SherpaOnnxDestroySpeechSegment(
|
||||
SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorReset(
|
||||
SherpaOnnxVoiceActivityDetector *p);
|
||||
|
||||
// ============================================================
|
||||
// For offline Text-to-Speech (i.e., non-streaming TTS)
|
||||
// ============================================================
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsVitsModelConfig {
|
||||
const char *model;
|
||||
const char *lexicon;
|
||||
const char *tokens;
|
||||
|
||||
float noise_scale;
|
||||
float noise_scale_w;
|
||||
float length_scale; // < 1, faster in speed; > 1, slower in speed
|
||||
} SherpaOnnxOfflineTtsVitsModelConfig;
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsModelConfig {
|
||||
SherpaOnnxOfflineTtsVitsModelConfig vits;
|
||||
int32_t num_threads;
|
||||
int32_t debug;
|
||||
const char *provider;
|
||||
} SherpaOnnxOfflineTtsModelConfig;
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsConfig {
|
||||
SherpaOnnxOfflineTtsModelConfig model;
|
||||
} SherpaOnnxOfflineTtsConfig;
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxGeneratedAudio {
|
||||
const float *samples; // in the range [-1, 1]
|
||||
int32_t n; // number of samples
|
||||
int32_t sample_rate;
|
||||
} SherpaOnnxGeneratedAudio;
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTts SherpaOnnxOfflineTts;
|
||||
|
||||
// Create an instance of offline TTS. The user has to use DestroyOfflineTts()
|
||||
// to free the returned pointer to avoid memory leak.
|
||||
SHERPA_ONNX_API SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
|
||||
const SherpaOnnxOfflineTtsConfig *config);
|
||||
|
||||
// Free the pointer returned by CreateOfflineTts()
|
||||
SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts);
|
||||
|
||||
// Generate audio from the given text and speaker id (sid).
|
||||
// The user has to use DestroyOfflineTtsGeneratedAudio() to free the returned
|
||||
// pointer to avoid memory leak.
|
||||
SHERPA_ONNX_API const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate(
|
||||
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid);
|
||||
|
||||
SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTtsGeneratedAudio(
|
||||
const SherpaOnnxGeneratedAudio *p);
|
||||
|
||||
// Write the generated audio to a wave file.
|
||||
// The saved wave file contains a single channel and has 16-bit samples.
|
||||
//
|
||||
// Return 1 if the write succeeded; return 0 on failure.
|
||||
SHERPA_ONNX_API int32_t SherpaOnnxDestroyOfflineWriteWave(
|
||||
const SherpaOnnxGeneratedAudio *p, const char *filename);
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
@@ -40,7 +40,7 @@ wget https://huggingface.co/csukuangfj/vits-ljs/resolve/main/tokens.txt
|
||||
|
||||
Please see
|
||||
https://k2-fsa.github.io/sherpa/onnx/tts/index.html
|
||||
or detailes.
|
||||
or details.
|
||||
)usage";
|
||||
|
||||
sherpa_onnx::ParseOptions po(kUsageMessage);
|
||||
|
||||
Reference in New Issue
Block a user