Add Android demo for speaker recognition (#536)
See pre-built Android APKs at https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk.html
This commit is contained in:
2
sherpa-onnx/csrc/.gitignore
vendored
Normal file
2
sherpa-onnx/csrc/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
*.cc-bak
|
||||
*.h-bak
|
||||
@@ -22,6 +22,12 @@ class SpeakerEmbeddingExtractorGeneralImpl
|
||||
const SpeakerEmbeddingExtractorConfig &config)
|
||||
: model_(config) {}
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
SpeakerEmbeddingExtractorGeneralImpl(
|
||||
AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config)
|
||||
: model_(mgr, config) {}
|
||||
#endif
|
||||
|
||||
int32_t Dim() const override { return model_.GetMetaData().output_dim; }
|
||||
|
||||
std::unique_ptr<OnlineStream> CreateStream() const override {
|
||||
|
||||
@@ -90,4 +90,35 @@ SpeakerEmbeddingExtractorImpl::Create(
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
std::unique_ptr<SpeakerEmbeddingExtractorImpl>
|
||||
SpeakerEmbeddingExtractorImpl::Create(
|
||||
AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) {
|
||||
ModelType model_type = ModelType::kUnkown;
|
||||
|
||||
{
|
||||
auto buffer = ReadFile(mgr, config.model);
|
||||
|
||||
model_type = GetModelType(buffer.data(), buffer.size(), config.debug);
|
||||
}
|
||||
|
||||
switch (model_type) {
|
||||
case ModelType::kWeSpeaker:
|
||||
// fall through
|
||||
case ModelType::k3dSpeaker:
|
||||
return std::make_unique<SpeakerEmbeddingExtractorGeneralImpl>(mgr,
|
||||
config);
|
||||
case ModelType::kNeMo:
|
||||
return std::make_unique<SpeakerEmbeddingExtractorNeMoImpl>(mgr, config);
|
||||
case ModelType::kUnkown:
|
||||
SHERPA_ONNX_LOGE(
|
||||
"Unknown model type in for speaker embedding extractor!");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// unreachable code
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
|
||||
@@ -9,6 +9,11 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
#include "android/asset_manager.h"
|
||||
#include "android/asset_manager_jni.h"
|
||||
#endif
|
||||
|
||||
#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
@@ -20,6 +25,11 @@ class SpeakerEmbeddingExtractorImpl {
|
||||
static std::unique_ptr<SpeakerEmbeddingExtractorImpl> Create(
|
||||
const SpeakerEmbeddingExtractorConfig &config);
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
static std::unique_ptr<SpeakerEmbeddingExtractorImpl> Create(
|
||||
AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
|
||||
#endif
|
||||
|
||||
virtual int32_t Dim() const = 0;
|
||||
|
||||
virtual std::unique_ptr<OnlineStream> CreateStream() const = 0;
|
||||
|
||||
@@ -28,6 +28,19 @@ class SpeakerEmbeddingExtractorModel::Impl {
|
||||
}
|
||||
}
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
Impl(AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config)
|
||||
: config_(config),
|
||||
env_(ORT_LOGGING_LEVEL_ERROR),
|
||||
sess_opts_(GetSessionOptions(config)),
|
||||
allocator_{} {
|
||||
{
|
||||
auto buf = ReadFile(mgr, config.model);
|
||||
Init(buf.data(), buf.size());
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
Ort::Value Compute(Ort::Value x) const {
|
||||
std::array<Ort::Value, 1> inputs = {std::move(x)};
|
||||
|
||||
@@ -98,6 +111,12 @@ SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel(
|
||||
const SpeakerEmbeddingExtractorConfig &config)
|
||||
: impl_(std::make_unique<Impl>(config)) {}
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel(
|
||||
AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config)
|
||||
: impl_(std::make_unique<Impl>(mgr, config)) {}
|
||||
#endif
|
||||
|
||||
SpeakerEmbeddingExtractorModel::~SpeakerEmbeddingExtractorModel() = default;
|
||||
|
||||
const SpeakerEmbeddingExtractorModelMetaData &
|
||||
|
||||
@@ -6,6 +6,11 @@
|
||||
|
||||
#include <memory>
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
#include "android/asset_manager.h"
|
||||
#include "android/asset_manager_jni.h"
|
||||
#endif
|
||||
|
||||
#include "onnxruntime_cxx_api.h" // NOLINT
|
||||
#include "sherpa-onnx/csrc/speaker-embedding-extractor-model-meta-data.h"
|
||||
#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
|
||||
@@ -17,6 +22,11 @@ class SpeakerEmbeddingExtractorModel {
|
||||
explicit SpeakerEmbeddingExtractorModel(
|
||||
const SpeakerEmbeddingExtractorConfig &config);
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
SpeakerEmbeddingExtractorModel(AAssetManager *mgr,
|
||||
const SpeakerEmbeddingExtractorConfig &config);
|
||||
#endif
|
||||
|
||||
~SpeakerEmbeddingExtractorModel();
|
||||
|
||||
const SpeakerEmbeddingExtractorModelMetaData &GetMetaData() const;
|
||||
|
||||
@@ -22,6 +22,12 @@ class SpeakerEmbeddingExtractorNeMoImpl : public SpeakerEmbeddingExtractorImpl {
|
||||
const SpeakerEmbeddingExtractorConfig &config)
|
||||
: model_(config) {}
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
SpeakerEmbeddingExtractorNeMoImpl(
|
||||
AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config)
|
||||
: model_(mgr, config) {}
|
||||
#endif
|
||||
|
||||
int32_t Dim() const override { return model_.GetMetaData().output_dim; }
|
||||
|
||||
std::unique_ptr<OnlineStream> CreateStream() const override {
|
||||
|
||||
@@ -28,6 +28,19 @@ class SpeakerEmbeddingExtractorNeMoModel::Impl {
|
||||
}
|
||||
}
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
Impl(AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config)
|
||||
: config_(config),
|
||||
env_(ORT_LOGGING_LEVEL_ERROR),
|
||||
sess_opts_(GetSessionOptions(config)),
|
||||
allocator_{} {
|
||||
{
|
||||
auto buf = ReadFile(mgr, config.model);
|
||||
Init(buf.data(), buf.size());
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
Ort::Value Compute(Ort::Value x, Ort::Value x_lens) const {
|
||||
std::array<Ort::Value, 2> inputs = {std::move(x), std::move(x_lens)};
|
||||
|
||||
@@ -106,6 +119,12 @@ SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel(
|
||||
const SpeakerEmbeddingExtractorConfig &config)
|
||||
: impl_(std::make_unique<Impl>(config)) {}
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel(
|
||||
AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config)
|
||||
: impl_(std::make_unique<Impl>(mgr, config)) {}
|
||||
#endif
|
||||
|
||||
SpeakerEmbeddingExtractorNeMoModel::~SpeakerEmbeddingExtractorNeMoModel() =
|
||||
default;
|
||||
|
||||
|
||||
@@ -6,6 +6,11 @@
|
||||
|
||||
#include <memory>
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
#include "android/asset_manager.h"
|
||||
#include "android/asset_manager_jni.h"
|
||||
#endif
|
||||
|
||||
#include "onnxruntime_cxx_api.h" // NOLINT
|
||||
#include "sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model-meta-data.h"
|
||||
#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
|
||||
@@ -17,6 +22,11 @@ class SpeakerEmbeddingExtractorNeMoModel {
|
||||
explicit SpeakerEmbeddingExtractorNeMoModel(
|
||||
const SpeakerEmbeddingExtractorConfig &config);
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
SpeakerEmbeddingExtractorNeMoModel(
|
||||
AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
|
||||
#endif
|
||||
|
||||
~SpeakerEmbeddingExtractorNeMoModel();
|
||||
|
||||
const SpeakerEmbeddingExtractorNeMoModelMetaData &GetMetaData() const;
|
||||
|
||||
@@ -55,6 +55,12 @@ SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor(
|
||||
const SpeakerEmbeddingExtractorConfig &config)
|
||||
: impl_(SpeakerEmbeddingExtractorImpl::Create(config)) {}
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor(
|
||||
AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config)
|
||||
: impl_(SpeakerEmbeddingExtractorImpl::Create(mgr, config)) {}
|
||||
#endif
|
||||
|
||||
SpeakerEmbeddingExtractor::~SpeakerEmbeddingExtractor() = default;
|
||||
|
||||
int32_t SpeakerEmbeddingExtractor::Dim() const { return impl_->Dim(); }
|
||||
|
||||
@@ -9,6 +9,11 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
#include "android/asset_manager.h"
|
||||
#include "android/asset_manager_jni.h"
|
||||
#endif
|
||||
|
||||
#include "sherpa-onnx/csrc/online-stream.h"
|
||||
#include "sherpa-onnx/csrc/parse-options.h"
|
||||
|
||||
@@ -40,6 +45,11 @@ class SpeakerEmbeddingExtractor {
|
||||
explicit SpeakerEmbeddingExtractor(
|
||||
const SpeakerEmbeddingExtractorConfig &config);
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
SpeakerEmbeddingExtractor(AAssetManager *mgr,
|
||||
const SpeakerEmbeddingExtractorConfig &config);
|
||||
#endif
|
||||
|
||||
~SpeakerEmbeddingExtractor();
|
||||
|
||||
// Return the dimension of the embedding
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include <unordered_map>
|
||||
|
||||
#include "Eigen/Dense"
|
||||
#include "sherpa-onnx/csrc/macros.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
@@ -36,6 +37,52 @@ class SpeakerEmbeddingManager::Impl {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Add(const std::string &name,
|
||||
const std::vector<std::vector<float>> &embedding_list) {
|
||||
if (name2row_.count(name)) {
|
||||
// a speaker with the same name already exists
|
||||
return false;
|
||||
}
|
||||
|
||||
if (embedding_list.empty()) {
|
||||
SHERPA_ONNX_LOGE("Empty list of embeddings");
|
||||
return false;
|
||||
}
|
||||
|
||||
for (const auto &x : embedding_list) {
|
||||
if (x.size() != dim_) {
|
||||
SHERPA_ONNX_LOGE("Given dim: %d, expected dim: %d",
|
||||
static_cast<int32_t>(x.size()), dim_);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// compute the average
|
||||
Eigen::RowVectorXf v = Eigen::Map<Eigen::RowVectorXf>(
|
||||
const_cast<float *>(embedding_list[0].data()), dim_);
|
||||
int32_t i = -1;
|
||||
for (const auto &x : embedding_list) {
|
||||
++i;
|
||||
if (i == 0) {
|
||||
continue;
|
||||
}
|
||||
v += Eigen::Map<Eigen::RowVectorXf>(const_cast<float *>(x.data()), dim_);
|
||||
}
|
||||
|
||||
// no need to compute the mean since we are going to normalize it anyway
|
||||
// v /= embedding_list.size();
|
||||
|
||||
v.normalize();
|
||||
|
||||
embedding_matrix_.conservativeResize(embedding_matrix_.rows() + 1, dim_);
|
||||
embedding_matrix_.bottomRows(1) = v;
|
||||
|
||||
name2row_[name] = embedding_matrix_.rows() - 1;
|
||||
row2name_[embedding_matrix_.rows() - 1] = name;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Remove(const std::string &name) {
|
||||
if (!name2row_.count(name)) {
|
||||
return false;
|
||||
@@ -104,8 +151,24 @@ class SpeakerEmbeddingManager::Impl {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Contains(const std::string &name) const {
|
||||
return name2row_.count(name) > 0;
|
||||
}
|
||||
|
||||
int32_t NumSpeakers() const { return embedding_matrix_.rows(); }
|
||||
|
||||
int32_t Dim() const { return dim_; }
|
||||
|
||||
std::vector<std::string> GetAllSpeakers() const {
|
||||
std::vector<std::string> all_speakers;
|
||||
for (const auto &p : name2row_) {
|
||||
all_speakers.push_back(p.first);
|
||||
}
|
||||
|
||||
std::stable_sort(all_speakers.begin(), all_speakers.end());
|
||||
return all_speakers;
|
||||
}
|
||||
|
||||
private:
|
||||
int32_t dim_;
|
||||
FloatMatrix embedding_matrix_;
|
||||
@@ -123,6 +186,12 @@ bool SpeakerEmbeddingManager::Add(const std::string &name,
|
||||
return impl_->Add(name, p);
|
||||
}
|
||||
|
||||
bool SpeakerEmbeddingManager::Add(
|
||||
const std::string &name,
|
||||
const std::vector<std::vector<float>> &embedding_list) const {
|
||||
return impl_->Add(name, embedding_list);
|
||||
}
|
||||
|
||||
bool SpeakerEmbeddingManager::Remove(const std::string &name) const {
|
||||
return impl_->Remove(name);
|
||||
}
|
||||
@@ -141,4 +210,14 @@ int32_t SpeakerEmbeddingManager::NumSpeakers() const {
|
||||
return impl_->NumSpeakers();
|
||||
}
|
||||
|
||||
int32_t SpeakerEmbeddingManager::Dim() const { return impl_->Dim(); }
|
||||
|
||||
bool SpeakerEmbeddingManager::Contains(const std::string &name) const {
|
||||
return impl_->Contains(name);
|
||||
}
|
||||
|
||||
std::vector<std::string> SpeakerEmbeddingManager::GetAllSpeakers() const {
|
||||
return impl_->GetAllSpeakers();
|
||||
}
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
@@ -26,6 +27,19 @@ class SpeakerEmbeddingManager {
|
||||
*/
|
||||
bool Add(const std::string &name, const float *p) const;
|
||||
|
||||
/** Add a list of embeddings of a speaker.
|
||||
*
|
||||
* @param name Name of the speaker
|
||||
* @param embedding_list A list of embeddings. Each entry should be of size
|
||||
* `dim`. The average of the list is the final
|
||||
* embedding.
|
||||
* @return Return true if added successfully. Return false if it failed.
|
||||
* At present, the only reason for a failure is that there is already
|
||||
* a speaker with the same `name`.
|
||||
*/
|
||||
bool Add(const std::string &name,
|
||||
const std::vector<std::vector<float>> &embedding_list) const;
|
||||
|
||||
/* Remove a speaker by its name.
|
||||
*
|
||||
* @param name Name of the speaker to remove.
|
||||
@@ -60,8 +74,16 @@ class SpeakerEmbeddingManager {
|
||||
*/
|
||||
bool Verify(const std::string &name, const float *p, float threshold) const;
|
||||
|
||||
// Return true if the given speaker already exists; return false otherwise.
|
||||
bool Contains(const std::string &name) const;
|
||||
|
||||
int32_t NumSpeakers() const;
|
||||
|
||||
int32_t Dim() const;
|
||||
|
||||
// Return a list of speaker names
|
||||
std::vector<std::string> GetAllSpeakers() const;
|
||||
|
||||
private:
|
||||
class Impl;
|
||||
std::unique_ptr<Impl> impl_;
|
||||
|
||||
@@ -27,6 +27,8 @@
|
||||
#include "sherpa-onnx/csrc/offline-tts.h"
|
||||
#include "sherpa-onnx/csrc/online-recognizer.h"
|
||||
#include "sherpa-onnx/csrc/onnx-utils.h"
|
||||
#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
|
||||
#include "sherpa-onnx/csrc/speaker-embedding-manager.h"
|
||||
#include "sherpa-onnx/csrc/voice-activity-detector.h"
|
||||
#include "sherpa-onnx/csrc/wave-reader.h"
|
||||
#include "sherpa-onnx/csrc/wave-writer.h"
|
||||
@@ -208,6 +210,85 @@ class SherpaOnnxKws {
|
||||
int32_t input_sample_rate_ = -1;
|
||||
};
|
||||
|
||||
class SherpaOnnxSpeakerEmbeddingExtractorStream {
|
||||
public:
|
||||
explicit SherpaOnnxSpeakerEmbeddingExtractorStream(
|
||||
std::unique_ptr<OnlineStream> stream)
|
||||
: stream_(std::move(stream)) {}
|
||||
|
||||
void AcceptWaveform(int32_t sample_rate, const float *samples,
|
||||
int32_t n) const {
|
||||
stream_->AcceptWaveform(sample_rate, samples, n);
|
||||
}
|
||||
|
||||
void InputFinished() const { stream_->InputFinished(); }
|
||||
|
||||
OnlineStream *Get() const { return stream_.get(); }
|
||||
|
||||
private:
|
||||
std::unique_ptr<OnlineStream> stream_;
|
||||
};
|
||||
|
||||
class SherpaOnnxSpeakerEmbeddingExtractor {
|
||||
public:
|
||||
#if __ANDROID_API__ >= 9
|
||||
SherpaOnnxSpeakerEmbeddingExtractor(
|
||||
AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config)
|
||||
: extractor_(mgr, config) {}
|
||||
#endif
|
||||
|
||||
explicit SherpaOnnxSpeakerEmbeddingExtractor(
|
||||
const SpeakerEmbeddingExtractorConfig &config)
|
||||
: extractor_(config) {}
|
||||
|
||||
int32_t Dim() const { return extractor_.Dim(); }
|
||||
|
||||
bool IsReady(const SherpaOnnxSpeakerEmbeddingExtractorStream *stream) const {
|
||||
return extractor_.IsReady(stream->Get());
|
||||
}
|
||||
|
||||
SherpaOnnxSpeakerEmbeddingExtractorStream *CreateStream() const {
|
||||
return new SherpaOnnxSpeakerEmbeddingExtractorStream(
|
||||
extractor_.CreateStream());
|
||||
}
|
||||
|
||||
std::vector<float> Compute(
|
||||
const SherpaOnnxSpeakerEmbeddingExtractorStream *stream) const {
|
||||
return extractor_.Compute(stream->Get());
|
||||
}
|
||||
|
||||
private:
|
||||
SpeakerEmbeddingExtractor extractor_;
|
||||
};
|
||||
|
||||
static SpeakerEmbeddingExtractorConfig GetSpeakerEmbeddingExtractorConfig(
|
||||
JNIEnv *env, jobject config) {
|
||||
SpeakerEmbeddingExtractorConfig ans;
|
||||
|
||||
jclass cls = env->GetObjectClass(config);
|
||||
|
||||
jfieldID fid = env->GetFieldID(cls, "model", "Ljava/lang/String;");
|
||||
jstring s = (jstring)env->GetObjectField(config, fid);
|
||||
const char *p = env->GetStringUTFChars(s, nullptr);
|
||||
|
||||
ans.model = p;
|
||||
env->ReleaseStringUTFChars(s, p);
|
||||
|
||||
fid = env->GetFieldID(cls, "numThreads", "I");
|
||||
ans.num_threads = env->GetIntField(config, fid);
|
||||
|
||||
fid = env->GetFieldID(cls, "debug", "Z");
|
||||
ans.debug = env->GetBooleanField(config, fid);
|
||||
|
||||
fid = env->GetFieldID(cls, "provider", "Ljava/lang/String;");
|
||||
s = (jstring)env->GetObjectField(config, fid);
|
||||
p = env->GetStringUTFChars(s, nullptr);
|
||||
ans.provider = p;
|
||||
env->ReleaseStringUTFChars(s, p);
|
||||
|
||||
return ans;
|
||||
}
|
||||
|
||||
static OnlineRecognizerConfig GetConfig(JNIEnv *env, jobject config) {
|
||||
OnlineRecognizerConfig ans;
|
||||
|
||||
@@ -771,6 +852,334 @@ static OfflineTtsConfig GetOfflineTtsConfig(JNIEnv *env, jobject config) {
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT jlong JNICALL
|
||||
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_new(JNIEnv *env,
|
||||
jobject /*obj*/,
|
||||
jobject asset_manager,
|
||||
jobject _config) {
|
||||
#if __ANDROID_API__ >= 9
|
||||
AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
|
||||
if (!mgr) {
|
||||
SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
|
||||
}
|
||||
#endif
|
||||
auto config = sherpa_onnx::GetSpeakerEmbeddingExtractorConfig(env, _config);
|
||||
SHERPA_ONNX_LOGE("new config:\n%s", config.ToString().c_str());
|
||||
|
||||
auto extractor = new sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractor(
|
||||
#if __ANDROID_API__ >= 9
|
||||
mgr,
|
||||
#endif
|
||||
config);
|
||||
|
||||
return (jlong)extractor;
|
||||
}
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT jlong JNICALL
|
||||
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_newFromFile(
|
||||
JNIEnv *env, jobject /*obj*/, jobject _config) {
|
||||
auto config = sherpa_onnx::GetSpeakerEmbeddingExtractorConfig(env, _config);
|
||||
SHERPA_ONNX_LOGE("newFromFile config:\n%s", config.ToString().c_str());
|
||||
|
||||
if (!config.Validate()) {
|
||||
SHERPA_ONNX_LOGE("Errors found in config!");
|
||||
}
|
||||
|
||||
auto extractor = new sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractor(config);
|
||||
|
||||
return (jlong)extractor;
|
||||
}
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT void JNICALL
|
||||
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_delete(JNIEnv *env,
|
||||
jobject /*obj*/,
|
||||
jlong ptr) {
|
||||
delete reinterpret_cast<sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractor *>(
|
||||
ptr);
|
||||
}
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT jlong JNICALL
|
||||
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_createStream(
|
||||
JNIEnv *env, jobject /*obj*/, jlong ptr) {
|
||||
auto stream =
|
||||
reinterpret_cast<sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractor *>(ptr)
|
||||
->CreateStream();
|
||||
|
||||
return (jlong)stream;
|
||||
}
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT jboolean JNICALL
|
||||
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_isReady(JNIEnv *env,
|
||||
jobject /*obj*/,
|
||||
jlong ptr,
|
||||
jlong stream_ptr) {
|
||||
auto extractor =
|
||||
reinterpret_cast<sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractor *>(ptr);
|
||||
auto stream = reinterpret_cast<
|
||||
sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractorStream *>(stream_ptr);
|
||||
return extractor->IsReady(stream);
|
||||
}
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT jfloatArray JNICALL
|
||||
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_compute(JNIEnv *env,
|
||||
jobject /*obj*/,
|
||||
jlong ptr,
|
||||
jlong stream_ptr) {
|
||||
auto extractor =
|
||||
reinterpret_cast<sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractor *>(ptr);
|
||||
auto stream = reinterpret_cast<
|
||||
sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractorStream *>(stream_ptr);
|
||||
|
||||
std::vector<float> embedding = extractor->Compute(stream);
|
||||
jfloatArray embedding_arr = env->NewFloatArray(embedding.size());
|
||||
env->SetFloatArrayRegion(embedding_arr, 0, embedding.size(),
|
||||
embedding.data());
|
||||
return embedding_arr;
|
||||
}
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT jint JNICALL Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_dim(
|
||||
JNIEnv *env, jobject /*obj*/, jlong ptr) {
|
||||
auto extractor =
|
||||
reinterpret_cast<sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractor *>(ptr);
|
||||
return extractor->Dim();
|
||||
}
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT void JNICALL
|
||||
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractorStream_delete(
|
||||
JNIEnv *env, jobject /*obj*/, jlong ptr) {
|
||||
delete reinterpret_cast<
|
||||
sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractorStream *>(ptr);
|
||||
}
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT void JNICALL
|
||||
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractorStream_acceptWaveform(
|
||||
JNIEnv *env, jobject /*obj*/, jlong ptr, jfloatArray samples,
|
||||
jint sample_rate) {
|
||||
auto stream = reinterpret_cast<
|
||||
sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractorStream *>(ptr);
|
||||
|
||||
jfloat *p = env->GetFloatArrayElements(samples, nullptr);
|
||||
jsize n = env->GetArrayLength(samples);
|
||||
stream->AcceptWaveform(sample_rate, p, n);
|
||||
env->ReleaseFloatArrayElements(samples, p, JNI_ABORT);
|
||||
}
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT void JNICALL
|
||||
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractorStream_inputFinished(
|
||||
JNIEnv *env, jobject /*obj*/, jlong ptr) {
|
||||
auto stream = reinterpret_cast<
|
||||
sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractorStream *>(ptr);
|
||||
stream->InputFinished();
|
||||
}
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_new(
|
||||
JNIEnv *env, jobject /*obj*/, jint dim) {
|
||||
auto p = new sherpa_onnx::SpeakerEmbeddingManager(dim);
|
||||
return (jlong)p;
|
||||
}
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT void JNICALL
|
||||
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_delete(JNIEnv *env,
|
||||
jobject /*obj*/,
|
||||
jlong ptr) {
|
||||
auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);
|
||||
delete manager;
|
||||
}
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT jboolean JNICALL
|
||||
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_add(JNIEnv *env,
|
||||
jobject /*obj*/,
|
||||
jlong ptr, jstring name,
|
||||
jfloatArray embedding) {
|
||||
auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);
|
||||
|
||||
jfloat *p = env->GetFloatArrayElements(embedding, nullptr);
|
||||
jsize n = env->GetArrayLength(embedding);
|
||||
|
||||
if (n != manager->Dim()) {
|
||||
SHERPA_ONNX_LOGE("Expected dim %d, given %d", manager->Dim(),
|
||||
static_cast<int32_t>(n));
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
const char *p_name = env->GetStringUTFChars(name, nullptr);
|
||||
|
||||
jboolean ok = manager->Add(p_name, p);
|
||||
env->ReleaseStringUTFChars(name, p_name);
|
||||
env->ReleaseFloatArrayElements(embedding, p, JNI_ABORT);
|
||||
|
||||
return ok;
|
||||
}
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT jboolean JNICALL
|
||||
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_addList(
|
||||
JNIEnv *env, jobject /*obj*/, jlong ptr, jstring name,
|
||||
jobjectArray embedding_arr) {
|
||||
auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);
|
||||
|
||||
int num_embeddings = env->GetArrayLength(embedding_arr);
|
||||
if (num_embeddings == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<std::vector<float>> embedding_list;
|
||||
embedding_list.reserve(num_embeddings);
|
||||
for (int32_t i = 0; i != num_embeddings; ++i) {
|
||||
jfloatArray embedding =
|
||||
(jfloatArray)env->GetObjectArrayElement(embedding_arr, i);
|
||||
|
||||
jfloat *p = env->GetFloatArrayElements(embedding, nullptr);
|
||||
jsize n = env->GetArrayLength(embedding);
|
||||
|
||||
if (n != manager->Dim()) {
|
||||
SHERPA_ONNX_LOGE("i: %d. Expected dim %d, given %d", i, manager->Dim(),
|
||||
static_cast<int32_t>(n));
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
embedding_list.push_back({p, p + n});
|
||||
env->ReleaseFloatArrayElements(embedding, p, JNI_ABORT);
|
||||
}
|
||||
|
||||
const char *p_name = env->GetStringUTFChars(name, nullptr);
|
||||
|
||||
jboolean ok = manager->Add(p_name, embedding_list);
|
||||
|
||||
env->ReleaseStringUTFChars(name, p_name);
|
||||
|
||||
return ok;
|
||||
}
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT jboolean JNICALL
|
||||
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_remove(JNIEnv *env,
|
||||
jobject /*obj*/,
|
||||
jlong ptr,
|
||||
jstring name) {
|
||||
auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);
|
||||
|
||||
const char *p_name = env->GetStringUTFChars(name, nullptr);
|
||||
|
||||
jboolean ok = manager->Remove(p_name);
|
||||
|
||||
env->ReleaseStringUTFChars(name, p_name);
|
||||
|
||||
return ok;
|
||||
}
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_search(JNIEnv *env,
|
||||
jobject /*obj*/,
|
||||
jlong ptr,
|
||||
jfloatArray embedding,
|
||||
jfloat threshold) {
|
||||
auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);
|
||||
|
||||
jfloat *p = env->GetFloatArrayElements(embedding, nullptr);
|
||||
jsize n = env->GetArrayLength(embedding);
|
||||
|
||||
if (n != manager->Dim()) {
|
||||
SHERPA_ONNX_LOGE("Expected dim %d, given %d", manager->Dim(),
|
||||
static_cast<int32_t>(n));
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
std::string name = manager->Search(p, threshold);
|
||||
|
||||
env->ReleaseFloatArrayElements(embedding, p, JNI_ABORT);
|
||||
|
||||
return env->NewStringUTF(name.c_str());
|
||||
}
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT jboolean JNICALL
|
||||
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_verify(
|
||||
JNIEnv *env, jobject /*obj*/, jlong ptr, jstring name,
|
||||
jfloatArray embedding, jfloat threshold) {
|
||||
auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);
|
||||
|
||||
jfloat *p = env->GetFloatArrayElements(embedding, nullptr);
|
||||
jsize n = env->GetArrayLength(embedding);
|
||||
|
||||
if (n != manager->Dim()) {
|
||||
SHERPA_ONNX_LOGE("Expected dim %d, given %d", manager->Dim(),
|
||||
static_cast<int32_t>(n));
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
const char *p_name = env->GetStringUTFChars(name, nullptr);
|
||||
|
||||
jboolean ok = manager->Verify(p_name, p, threshold);
|
||||
|
||||
env->ReleaseFloatArrayElements(embedding, p, JNI_ABORT);
|
||||
|
||||
env->ReleaseStringUTFChars(name, p_name);
|
||||
|
||||
return ok;
|
||||
}
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT jboolean JNICALL
|
||||
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_contains(JNIEnv *env,
|
||||
jobject /*obj*/,
|
||||
jlong ptr,
|
||||
jstring name) {
|
||||
auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);
|
||||
|
||||
const char *p_name = env->GetStringUTFChars(name, nullptr);
|
||||
|
||||
jboolean ok = manager->Contains(p_name);
|
||||
|
||||
env->ReleaseStringUTFChars(name, p_name);
|
||||
|
||||
return ok;
|
||||
}
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT jint JNICALL
|
||||
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_numSpeakers(JNIEnv *env,
|
||||
jobject /*obj*/,
|
||||
jlong ptr) {
|
||||
auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);
|
||||
return manager->NumSpeakers();
|
||||
}
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT jobjectArray JNICALL
|
||||
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_allSpeakerNames(
|
||||
JNIEnv *env, jobject /*obj*/, jlong ptr) {
|
||||
auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);
|
||||
std::vector<std::string> all_speakers = manager->GetAllSpeakers();
|
||||
|
||||
jobjectArray obj_arr = (jobjectArray)env->NewObjectArray(
|
||||
all_speakers.size(), env->FindClass("java/lang/String"), nullptr);
|
||||
|
||||
int32_t i = 0;
|
||||
for (auto &s : all_speakers) {
|
||||
jstring js = env->NewStringUTF(s.c_str());
|
||||
env->SetObjectArrayElement(obj_arr, i, js);
|
||||
|
||||
++i;
|
||||
}
|
||||
|
||||
return obj_arr;
|
||||
}
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_new(
|
||||
JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) {
|
||||
@@ -783,10 +1192,6 @@ JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_new(
|
||||
auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config);
|
||||
SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());
|
||||
|
||||
if (!config.Validate()) {
|
||||
SHERPA_ONNX_LOGE("Erros found in config!");
|
||||
}
|
||||
|
||||
auto tts = new sherpa_onnx::SherpaOnnxOfflineTts(
|
||||
#if __ANDROID_API__ >= 9
|
||||
mgr,
|
||||
@@ -801,6 +1206,11 @@ JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_newFromFile(
|
||||
JNIEnv *env, jobject /*obj*/, jobject _config) {
|
||||
auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config);
|
||||
SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());
|
||||
|
||||
if (!config.Validate()) {
|
||||
SHERPA_ONNX_LOGE("Errors found in config!");
|
||||
}
|
||||
|
||||
auto tts = new sherpa_onnx::SherpaOnnxOfflineTts(config);
|
||||
|
||||
return (jlong)tts;
|
||||
|
||||
@@ -17,6 +17,14 @@ void PybindSpeakerEmbeddingManager(py::module *m) {
|
||||
.def(py::init<int32_t>(), py::arg("dim"),
|
||||
py::call_guard<py::gil_scoped_release>())
|
||||
.def_property_readonly("num_speakers", &PyClass::NumSpeakers)
|
||||
.def_property_readonly("dim", &PyClass::Dim)
|
||||
.def_property_readonly("all_speakers", &PyClass::GetAllSpeakers)
|
||||
.def(
|
||||
"__contains__",
|
||||
[](const PyClass &self, const std::string &name) -> bool {
|
||||
return self.Contains(name);
|
||||
},
|
||||
py::arg("name"), py::call_guard<py::gil_scoped_release>())
|
||||
.def(
|
||||
"add",
|
||||
[](const PyClass &self, const std::string &name,
|
||||
@@ -25,6 +33,14 @@ void PybindSpeakerEmbeddingManager(py::module *m) {
|
||||
},
|
||||
py::arg("name"), py::arg("v"),
|
||||
py::call_guard<py::gil_scoped_release>())
|
||||
.def(
|
||||
"add",
|
||||
[](const PyClass &self, const std::string &name,
|
||||
const std::vector<std::vector<float>> &embedding_list) -> bool {
|
||||
return self.Add(name, embedding_list);
|
||||
},
|
||||
py::arg("name"), py::arg("embedding_list"),
|
||||
py::call_guard<py::gil_scoped_release>())
|
||||
.def(
|
||||
"remove",
|
||||
[](const PyClass &self, const std::string &name) -> bool {
|
||||
|
||||
Reference in New Issue
Block a user