Add C API for speaker embedding extractor. (#711)

This commit is contained in:
Fangjun Kuang
2024-03-28 18:05:40 +08:00
committed by GitHub
parent 638f48f47a
commit 2e0bccad36
23 changed files with 739 additions and 80 deletions

View File

@@ -168,7 +168,8 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
ans.samples.insert(ans.samples.end(), audio.samples.begin(),
audio.samples.end());
if (callback) {
callback(audio.samples.data(), audio.samples.size(), b * 1.0 / num_batches);
callback(audio.samples.data(), audio.samples.size(),
b * 1.0 / num_batches);
// Caution(fangjun): audio is freed when the callback returns, so users
// should copy the data if they want to access the data after
// the callback returns to avoid segmentation fault.

View File

@@ -54,8 +54,8 @@ struct GeneratedAudio {
class OfflineTtsImpl;
using GeneratedAudioCallback =
std::function<void(const float * /*samples*/, int32_t /*n*/, float /*progress*/)>;
using GeneratedAudioCallback = std::function<void(
const float * /*samples*/, int32_t /*n*/, float /*progress*/)>;
class OfflineTts {
public:

View File

@@ -44,7 +44,8 @@ static void Handler(int32_t /*sig*/) {
fprintf(stderr, "\nCaught Ctrl + C. Exiting\n");
}
static void AudioGeneratedCallback(const float *s, int32_t n) {
static void AudioGeneratedCallback(const float *s, int32_t n,
float /*progress*/) {
if (n > 0) {
std::lock_guard<std::mutex> lock(g_buffer.mutex);
g_buffer.samples.push({s, s + n});

View File

@@ -47,7 +47,8 @@ static void Handler(int32_t /*sig*/) {
fprintf(stderr, "\nCaught Ctrl + C. Exiting\n");
}
static void AudioGeneratedCallback(const float *s, int32_t n, float /*progress*/) {
static void AudioGeneratedCallback(const float *s, int32_t n,
float /*progress*/) {
if (n > 0) {
Samples samples;
samples.data = std::vector<float>{s, s + n};

View File

@@ -9,9 +9,8 @@
#include "sherpa-onnx/csrc/parse-options.h"
#include "sherpa-onnx/csrc/wave-writer.h"
void audioCallback(const float *samples, int32_t n, float progress)
{
printf( "sample=%d, progress=%f\n", n, progress );
void audioCallback(const float *samples, int32_t n, float progress) {
printf("sample=%d, progress=%f\n", n, progress);
}
int main(int32_t argc, char *argv[]) {

View File

@@ -93,7 +93,7 @@ class SpeakerEmbeddingManager::Impl {
int32_t num_rows = embedding_matrix_.rows();
if (row_idx < num_rows - 1) {
embedding_matrix_.block(row_idx, 0, num_rows - -1 - row_idx, dim_) =
embedding_matrix_.block(row_idx, 0, num_rows - 1 - row_idx, dim_) =
embedding_matrix_.bottomRows(num_rows - 1 - row_idx);
}