Add speaker diarization API for HarmonyOS. (#1609)

This commit is contained in:
Fangjun Kuang
2024-12-10 16:03:03 +08:00
committed by GitHub
parent 14944d8c81
commit 1bae4085ca
18 changed files with 279 additions and 79 deletions

View File

@@ -1784,8 +1784,8 @@ struct SherpaOnnxOfflineSpeakerDiarizationResult {
sherpa_onnx::OfflineSpeakerDiarizationResult impl;
};
const SherpaOnnxOfflineSpeakerDiarization *
SherpaOnnxCreateOfflineSpeakerDiarization(
static sherpa_onnx::OfflineSpeakerDiarizationConfig
GetOfflineSpeakerDiarizationConfig(
const SherpaOnnxOfflineSpeakerDiarizationConfig *config) {
sherpa_onnx::OfflineSpeakerDiarizationConfig sd_config;
@@ -1820,6 +1820,22 @@ SherpaOnnxCreateOfflineSpeakerDiarization(
sd_config.min_duration_off = SHERPA_ONNX_OR(config->min_duration_off, 0.5);
if (sd_config.segmentation.debug || sd_config.embedding.debug) {
#if __OHOS__
SHERPA_ONNX_LOGE("%{public}s\n", sd_config.ToString().c_str());
#else
SHERPA_ONNX_LOGE("%s\n", sd_config.ToString().c_str());
#endif
}
return sd_config;
}
const SherpaOnnxOfflineSpeakerDiarization *
SherpaOnnxCreateOfflineSpeakerDiarization(
const SherpaOnnxOfflineSpeakerDiarizationConfig *config) {
auto sd_config = GetOfflineSpeakerDiarizationConfig(config);
if (!sd_config.Validate()) {
SHERPA_ONNX_LOGE("Errors in config");
return nullptr;
@@ -1831,10 +1847,6 @@ SherpaOnnxCreateOfflineSpeakerDiarization(
sd->impl =
std::make_unique<sherpa_onnx::OfflineSpeakerDiarization>(sd_config);
if (sd_config.segmentation.debug || sd_config.embedding.debug) {
SHERPA_ONNX_LOGE("%s\n", sd_config.ToString().c_str());
}
return sd;
}
@@ -2029,5 +2041,32 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
}
#endif // #if SHERPA_ONNX_ENABLE_TTS == 1
//
#if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1
const SherpaOnnxOfflineSpeakerDiarization *
SherpaOnnxCreateOfflineSpeakerDiarizationOHOS(
const SherpaOnnxOfflineSpeakerDiarizationConfig *config,
NativeResourceManager *mgr) {
if (!mgr) {
return SherpaOnnxCreateOfflineSpeakerDiarization(config);
}
auto sd_config = GetOfflineSpeakerDiarizationConfig(config);
if (!sd_config.Validate()) {
SHERPA_ONNX_LOGE("Errors in config");
return nullptr;
}
SherpaOnnxOfflineSpeakerDiarization *sd =
new SherpaOnnxOfflineSpeakerDiarization;
sd->impl =
std::make_unique<sherpa_onnx::OfflineSpeakerDiarization>(mgr, sd_config);
return sd;
}
#endif // #if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1
#endif // #ifdef __OHOS__

View File

@@ -1577,6 +1577,11 @@ SHERPA_ONNX_API const SherpaOnnxSpeakerEmbeddingExtractor *
SherpaOnnxCreateSpeakerEmbeddingExtractorOHOS(
const SherpaOnnxSpeakerEmbeddingExtractorConfig *config,
NativeResourceManager *mgr);
SHERPA_ONNX_API const SherpaOnnxOfflineSpeakerDiarization *
SherpaOnnxCreateOfflineSpeakerDiarizationOHOS(
const SherpaOnnxOfflineSpeakerDiarizationConfig *config,
NativeResourceManager *mgr);
#endif
#if defined(__GNUC__)

View File

@@ -6,6 +6,15 @@
#include <memory>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-speaker-diarization-pyannote-impl.h"
@@ -23,10 +32,10 @@ OfflineSpeakerDiarizationImpl::Create(
return nullptr;
}
#if __ANDROID_API__ >= 9
template <typename Manager>
std::unique_ptr<OfflineSpeakerDiarizationImpl>
OfflineSpeakerDiarizationImpl::Create(
AAssetManager *mgr, const OfflineSpeakerDiarizationConfig &config) {
Manager *mgr, const OfflineSpeakerDiarizationConfig &config) {
if (!config.segmentation.pyannote.model.empty()) {
return std::make_unique<OfflineSpeakerDiarizationPyannoteImpl>(mgr, config);
}
@@ -35,6 +44,17 @@ OfflineSpeakerDiarizationImpl::Create(
return nullptr;
}
#if __ANDROID_API__ >= 9
template std::unique_ptr<OfflineSpeakerDiarizationImpl>
OfflineSpeakerDiarizationImpl::Create(
AAssetManager *mgr, const OfflineSpeakerDiarizationConfig &config);
#endif
#if __OHOS__
template std::unique_ptr<OfflineSpeakerDiarizationImpl>
OfflineSpeakerDiarizationImpl::Create(
NativeResourceManager *mgr, const OfflineSpeakerDiarizationConfig &config);
#endif
} // namespace sherpa_onnx

View File

@@ -8,11 +8,6 @@
#include <functional>
#include <memory>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "sherpa-onnx/csrc/offline-speaker-diarization.h"
namespace sherpa_onnx {
@@ -21,10 +16,9 @@ class OfflineSpeakerDiarizationImpl {
static std::unique_ptr<OfflineSpeakerDiarizationImpl> Create(
const OfflineSpeakerDiarizationConfig &config);
#if __ANDROID_API__ >= 9
template <typename Manager>
static std::unique_ptr<OfflineSpeakerDiarizationImpl> Create(
AAssetManager *mgr, const OfflineSpeakerDiarizationConfig &config);
#endif
Manager *mgr, const OfflineSpeakerDiarizationConfig &config);
virtual ~OfflineSpeakerDiarizationImpl() = default;

View File

@@ -11,11 +11,6 @@
#include <utility>
#include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "Eigen/Dense"
#include "sherpa-onnx/csrc/fast-clustering.h"
#include "sherpa-onnx/csrc/math.h"
@@ -71,16 +66,15 @@ class OfflineSpeakerDiarizationPyannoteImpl
Init();
}
#if __ANDROID_API__ >= 9
template <typename Manager>
OfflineSpeakerDiarizationPyannoteImpl(
AAssetManager *mgr, const OfflineSpeakerDiarizationConfig &config)
Manager *mgr, const OfflineSpeakerDiarizationConfig &config)
: config_(config),
segmentation_model_(mgr, config_.segmentation),
embedding_extractor_(mgr, config_.embedding),
clustering_(std::make_unique<FastClustering>(config_.clustering)) {
Init();
}
#endif
int32_t SampleRate() const override {
const auto &meta_data = segmentation_model_.GetModelMetaData();
@@ -213,8 +207,13 @@ class OfflineSpeakerDiarizationPyannoteImpl
}
}
} else {
#if __OHOS__
SHERPA_ONNX_LOGE(
"powerset_max_classes = %{public}d is currently not supported!", i);
#else
SHERPA_ONNX_LOGE(
"powerset_max_classes = %d is currently not supported!", i);
#endif
SHERPA_ONNX_EXIT(-1);
}
}
@@ -229,10 +228,17 @@ class OfflineSpeakerDiarizationPyannoteImpl
int32_t window_shift = meta_data.window_shift;
if (n <= 0) {
#if __OHOS__
SHERPA_ONNX_LOGE(
"number of audio samples is %{public}d (<= 0). Please provide a "
"positive number",
n);
#else
SHERPA_ONNX_LOGE(
"number of audio samples is %d (<= 0). Please provide a positive "
"number",
n);
#endif
return {};
}

View File

@@ -7,6 +7,15 @@
#include <string>
#include <utility>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif
#include "sherpa-onnx/csrc/offline-speaker-diarization-impl.h"
namespace sherpa_onnx {
@@ -74,11 +83,10 @@ OfflineSpeakerDiarization::OfflineSpeakerDiarization(
const OfflineSpeakerDiarizationConfig &config)
: impl_(OfflineSpeakerDiarizationImpl::Create(config)) {}
#if __ANDROID_API__ >= 9
template <typename Manager>
OfflineSpeakerDiarization::OfflineSpeakerDiarization(
AAssetManager *mgr, const OfflineSpeakerDiarizationConfig &config)
Manager *mgr, const OfflineSpeakerDiarizationConfig &config)
: impl_(OfflineSpeakerDiarizationImpl::Create(mgr, config)) {}
#endif
OfflineSpeakerDiarization::~OfflineSpeakerDiarization() = default;
@@ -98,4 +106,14 @@ OfflineSpeakerDiarizationResult OfflineSpeakerDiarization::Process(
return impl_->Process(audio, n, std::move(callback), callback_arg);
}
#if __ANDROID_API__ >= 9
template OfflineSpeakerDiarization::OfflineSpeakerDiarization(
AAssetManager *mgr, const OfflineSpeakerDiarizationConfig &config);
#endif
#if __OHOS__
template OfflineSpeakerDiarization::OfflineSpeakerDiarization(
NativeResourceManager *mgr, const OfflineSpeakerDiarizationConfig &config);
#endif
} // namespace sherpa_onnx

View File

@@ -9,11 +9,6 @@
#include <memory>
#include <string>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "sherpa-onnx/csrc/fast-clustering-config.h"
#include "sherpa-onnx/csrc/offline-speaker-diarization-result.h"
#include "sherpa-onnx/csrc/offline-speaker-segmentation-model-config.h"
@@ -62,10 +57,9 @@ class OfflineSpeakerDiarization {
explicit OfflineSpeakerDiarization(
const OfflineSpeakerDiarizationConfig &config);
#if __ANDROID_API__ >= 9
OfflineSpeakerDiarization(AAssetManager *mgr,
template <typename Manager>
OfflineSpeakerDiarization(Manager *mgr,
const OfflineSpeakerDiarizationConfig &config);
#endif
~OfflineSpeakerDiarization();

View File

@@ -8,6 +8,15 @@
#include <utility>
#include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
@@ -24,8 +33,8 @@ class OfflineSpeakerSegmentationPyannoteModel::Impl {
Init(buf.data(), buf.size());
}
#if __ANDROID_API__ >= 9
Impl(AAssetManager *mgr, const OfflineSpeakerSegmentationModelConfig &config)
template <typename Manager>
Impl(Manager *mgr, const OfflineSpeakerSegmentationModelConfig &config)
: config_(config),
env_(ORT_LOGGING_LEVEL_ERROR),
sess_opts_(GetSessionOptions(config)),
@@ -33,7 +42,6 @@ class OfflineSpeakerSegmentationPyannoteModel::Impl {
auto buf = ReadFile(mgr, config_.pyannote.model);
Init(buf.data(), buf.size());
}
#endif
const OfflineSpeakerSegmentationPyannoteModelMetaData &GetModelMetaData()
const {
@@ -61,7 +69,11 @@ class OfflineSpeakerSegmentationPyannoteModel::Impl {
if (config_.debug) {
std::ostringstream os;
PrintModelMetadata(os, meta_data);
#if __OHOS__
SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
}
Ort::AllocatorWithDefaultOptions allocator; // used in the macro below
@@ -103,12 +115,11 @@ OfflineSpeakerSegmentationPyannoteModel::
const OfflineSpeakerSegmentationModelConfig &config)
: impl_(std::make_unique<Impl>(config)) {}
#if __ANDROID_API__ >= 9
template <typename Manager>
OfflineSpeakerSegmentationPyannoteModel::
OfflineSpeakerSegmentationPyannoteModel(
AAssetManager *mgr, const OfflineSpeakerSegmentationModelConfig &config)
Manager *mgr, const OfflineSpeakerSegmentationModelConfig &config)
: impl_(std::make_unique<Impl>(mgr, config)) {}
#endif
OfflineSpeakerSegmentationPyannoteModel::
~OfflineSpeakerSegmentationPyannoteModel() = default;
@@ -123,4 +134,18 @@ Ort::Value OfflineSpeakerSegmentationPyannoteModel::Forward(
return impl_->Forward(std::move(x));
}
#if __ANDROID_API__ >= 9
template OfflineSpeakerSegmentationPyannoteModel::
OfflineSpeakerSegmentationPyannoteModel(
AAssetManager *mgr,
const OfflineSpeakerSegmentationModelConfig &config);
#endif
#if __OHOS__
template OfflineSpeakerSegmentationPyannoteModel::
OfflineSpeakerSegmentationPyannoteModel(
NativeResourceManager *mgr,
const OfflineSpeakerSegmentationModelConfig &config);
#endif
} // namespace sherpa_onnx

View File

@@ -6,11 +6,6 @@
#include <memory>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "onnxruntime_cxx_api.h" // NOLINT
#include "sherpa-onnx/csrc/offline-speaker-segmentation-model-config.h"
#include "sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-meta-data.h"
@@ -22,10 +17,9 @@ class OfflineSpeakerSegmentationPyannoteModel {
explicit OfflineSpeakerSegmentationPyannoteModel(
const OfflineSpeakerSegmentationModelConfig &config);
#if __ANDROID_API__ >= 9
template <typename Manager>
OfflineSpeakerSegmentationPyannoteModel(
AAssetManager *mgr, const OfflineSpeakerSegmentationModelConfig &config);
#endif
Manager *mgr, const OfflineSpeakerSegmentationModelConfig &config);
~OfflineSpeakerSegmentationPyannoteModel();