This repository has been archived on 2025-08-26. You can view files and clone it, but cannot push or open issues or pull requests.
Files
enginex_bi_series-sherpa-onnx/c-api-examples/speaker-identification-c-api.c

258 lines
8.0 KiB
C

// c-api-examples/speaker-identification-c-api.c
//
// Copyright (c) 2024 Xiaomi Corporation
// We assume you have pre-downloaded the speaker embedding extractor model
// from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
//
// An example command to download
// "3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx"
// is given below:
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx
//
// clang-format on
//
// Also, please download the test wave files from
//
// https://github.com/csukuangfj/sr-data
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sherpa-onnx/c-api/c-api.h"
static const float *ComputeEmbedding(
const SherpaOnnxSpeakerEmbeddingExtractor *ex, const char *wav_filename) {
const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
if (wave == NULL) {
fprintf(stderr, "Failed to read %s\n", wav_filename);
exit(-1);
}
const SherpaOnnxOnlineStream *stream =
SherpaOnnxSpeakerEmbeddingExtractorCreateStream(ex);
SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, wave->samples,
wave->num_samples);
SherpaOnnxOnlineStreamInputFinished(stream);
if (!SherpaOnnxSpeakerEmbeddingExtractorIsReady(ex, stream)) {
fprintf(stderr, "The input wave file %s is too short!\n", wav_filename);
exit(-1);
}
// we will free `v` outside of this function
const float *v =
SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(ex, stream);
SherpaOnnxDestroyOnlineStream(stream);
SherpaOnnxFreeWave(wave);
// Remeber to free v to avoid memory leak
return v;
}
int32_t main() {
SherpaOnnxSpeakerEmbeddingExtractorConfig config;
memset(&config, 0, sizeof(config));
// please download the model from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
config.model = "./3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx";
config.num_threads = 1;
config.debug = 0;
config.provider = "cpu";
const SherpaOnnxSpeakerEmbeddingExtractor *ex =
SherpaOnnxCreateSpeakerEmbeddingExtractor(&config);
if (!ex) {
fprintf(stderr, "Failed to create speaker embedding extractor");
return -1;
}
int32_t dim = SherpaOnnxSpeakerEmbeddingExtractorDim(ex);
const SherpaOnnxSpeakerEmbeddingManager *manager =
SherpaOnnxCreateSpeakerEmbeddingManager(dim);
// Please download the test data from
// https://github.com/csukuangfj/sr-data
const char *spk1_1 = "./sr-data/enroll/fangjun-sr-1.wav";
const char *spk1_2 = "./sr-data/enroll/fangjun-sr-2.wav";
const char *spk1_3 = "./sr-data/enroll/fangjun-sr-3.wav";
const char *spk2_1 = "./sr-data/enroll/leijun-sr-1.wav";
const char *spk2_2 = "./sr-data/enroll/leijun-sr-2.wav";
const float *spk1_vec[4] = {NULL};
spk1_vec[0] = ComputeEmbedding(ex, spk1_1);
spk1_vec[1] = ComputeEmbedding(ex, spk1_2);
spk1_vec[2] = ComputeEmbedding(ex, spk1_3);
const float *spk2_vec[3] = {NULL};
spk2_vec[0] = ComputeEmbedding(ex, spk2_1);
spk2_vec[1] = ComputeEmbedding(ex, spk2_2);
if (!SherpaOnnxSpeakerEmbeddingManagerAddList(manager, "fangjun", spk1_vec)) {
fprintf(stderr, "Failed to register fangjun\n");
exit(-1);
}
if (!SherpaOnnxSpeakerEmbeddingManagerContains(manager, "fangjun")) {
fprintf(stderr, "Failed to find fangjun\n");
exit(-1);
}
if (!SherpaOnnxSpeakerEmbeddingManagerAddList(manager, "leijun", spk2_vec)) {
fprintf(stderr, "Failed to register leijun\n");
exit(-1);
}
if (!SherpaOnnxSpeakerEmbeddingManagerContains(manager, "leijun")) {
fprintf(stderr, "Failed to find leijun\n");
exit(-1);
}
if (SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager) != 2) {
fprintf(stderr, "There should be two speakers: fangjun and leijun\n");
exit(-1);
}
const char *const *all_speakers =
SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(manager);
const char *const *p = all_speakers;
fprintf(stderr, "list of registered speakers\n-----\n");
while (p[0]) {
fprintf(stderr, "speaker: %s\n", p[0]);
++p;
}
fprintf(stderr, "----\n");
SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(all_speakers);
const char *test1 = "./sr-data/test/fangjun-test-sr-1.wav";
const char *test2 = "./sr-data/test/leijun-test-sr-1.wav";
const char *test3 = "./sr-data/test/liudehua-test-sr-1.wav";
const float *v1 = ComputeEmbedding(ex, test1);
const float *v2 = ComputeEmbedding(ex, test2);
const float *v3 = ComputeEmbedding(ex, test3);
float threshold = 0.6;
const char *name1 =
SherpaOnnxSpeakerEmbeddingManagerSearch(manager, v1, threshold);
if (name1) {
fprintf(stderr, "%s: Found %s\n", test1, name1);
SherpaOnnxSpeakerEmbeddingManagerFreeSearch(name1);
} else {
fprintf(stderr, "%s: Not found\n", test1);
}
const char *name2 =
SherpaOnnxSpeakerEmbeddingManagerSearch(manager, v2, threshold);
if (name2) {
fprintf(stderr, "%s: Found %s\n", test2, name2);
SherpaOnnxSpeakerEmbeddingManagerFreeSearch(name2);
} else {
fprintf(stderr, "%s: Not found\n", test2);
}
const char *name3 =
SherpaOnnxSpeakerEmbeddingManagerSearch(manager, v3, threshold);
if (name3) {
fprintf(stderr, "%s: Found %s\n", test3, name3);
SherpaOnnxSpeakerEmbeddingManagerFreeSearch(name3);
} else {
fprintf(stderr, "%s: Not found\n", test3);
}
int32_t ok = SherpaOnnxSpeakerEmbeddingManagerVerify(manager, "fangjun", v1,
threshold);
if (ok) {
fprintf(stderr, "%s matches fangjun\n", test1);
} else {
fprintf(stderr, "%s does NOT match fangjun\n", test1);
}
ok = SherpaOnnxSpeakerEmbeddingManagerVerify(manager, "fangjun", v2,
threshold);
if (ok) {
fprintf(stderr, "%s matches fangjun\n", test2);
} else {
fprintf(stderr, "%s does NOT match fangjun\n", test2);
}
fprintf(stderr, "Removing fangjun\n");
if (!SherpaOnnxSpeakerEmbeddingManagerRemove(manager, "fangjun")) {
fprintf(stderr, "Failed to remove fangjun\n");
exit(-1);
}
if (SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager) != 1) {
fprintf(stderr, "There should be only 1 speaker left\n");
exit(-1);
}
name1 = SherpaOnnxSpeakerEmbeddingManagerSearch(manager, v1, threshold);
if (name1) {
fprintf(stderr, "%s: Found %s\n", test1, name1);
SherpaOnnxSpeakerEmbeddingManagerFreeSearch(name1);
} else {
fprintf(stderr, "%s: Not found\n", test1);
}
fprintf(stderr, "Removing leijun\n");
if (!SherpaOnnxSpeakerEmbeddingManagerRemove(manager, "leijun")) {
fprintf(stderr, "Failed to remove leijun\n");
exit(-1);
}
if (SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager) != 0) {
fprintf(stderr, "There should be only 1 speaker left\n");
exit(-1);
}
name2 = SherpaOnnxSpeakerEmbeddingManagerSearch(manager, v2, threshold);
if (name2) {
fprintf(stderr, "%s: Found %s\n", test2, name2);
SherpaOnnxSpeakerEmbeddingManagerFreeSearch(name2);
} else {
fprintf(stderr, "%s: Not found\n", test2);
}
all_speakers = SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(manager);
p = all_speakers;
fprintf(stderr, "list of registered speakers\n-----\n");
while (p[0]) {
fprintf(stderr, "speaker: %s\n", p[0]);
++p;
}
fprintf(stderr, "----\n");
SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(all_speakers);
SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(v1);
SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(v2);
SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(v3);
SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(spk1_vec[0]);
SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(spk1_vec[1]);
SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(spk1_vec[2]);
SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(spk2_vec[0]);
SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(spk2_vec[1]);
SherpaOnnxDestroySpeakerEmbeddingManager(manager);
SherpaOnnxDestroySpeakerEmbeddingExtractor(ex);
return 0;
}