Add speaker identification APIs for HarmonyOS (#1607)

* Add speaker embedding extractor API for HarmonyOS

* Add ArkTS API for speaker identification
This commit is contained in:
Fangjun Kuang
2024-12-09 19:23:18 +08:00
committed by GitHub
parent a743a4400f
commit 314545f938
19 changed files with 374 additions and 60 deletions

2
.gitignore vendored
View File

@@ -123,3 +123,5 @@ sherpa-onnx-online-punct-en-2024-08-06
sherpa-onnx-pyannote-segmentation-3-0 sherpa-onnx-pyannote-segmentation-3-0
sherpa-onnx-moonshine-tiny-en-int8 sherpa-onnx-moonshine-tiny-en-int8
sherpa-onnx-moonshine-base-en-int8 sherpa-onnx-moonshine-base-en-int8
harmony-os/SherpaOnnxHar/sherpa_onnx/LICENSE
harmony-os/SherpaOnnxHar/sherpa_onnx/CHANGELOG.md

View File

@@ -51,3 +51,9 @@ export {
TtsOutput, TtsOutput,
TtsInput, TtsInput,
} from './src/main/ets/components/NonStreamingTts'; } from './src/main/ets/components/NonStreamingTts';
export {
SpeakerEmbeddingExtractorConfig,
SpeakerEmbeddingExtractor,
SpeakerEmbeddingManager,
} from './src/main/ets/components/SpeakerIdentification';

View File

@@ -11,6 +11,17 @@
static Napi::External<SherpaOnnxSpeakerEmbeddingExtractor> static Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>
CreateSpeakerEmbeddingExtractorWrapper(const Napi::CallbackInfo &info) { CreateSpeakerEmbeddingExtractorWrapper(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env(); Napi::Env env = info.Env();
#if __OHOS__
if (info.Length() != 2) {
std::ostringstream os;
os << "Expect only 2 arguments. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
#else
if (info.Length() != 1) { if (info.Length() != 1) {
std::ostringstream os; std::ostringstream os;
os << "Expect only 1 argument. Given: " << info.Length(); os << "Expect only 1 argument. Given: " << info.Length();
@@ -19,6 +30,7 @@ CreateSpeakerEmbeddingExtractorWrapper(const Napi::CallbackInfo &info) {
return {}; return {};
} }
#endif
if (!info[0].IsObject()) { if (!info[0].IsObject()) {
Napi::TypeError::New(env, "You should pass an object as the only argument.") Napi::TypeError::New(env, "You should pass an object as the only argument.")
@@ -46,8 +58,18 @@ CreateSpeakerEmbeddingExtractorWrapper(const Napi::CallbackInfo &info) {
SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider); SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
#if __OHOS__
std::unique_ptr<NativeResourceManager,
decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
&OH_ResourceManager_ReleaseNativeResourceManager);
const SherpaOnnxSpeakerEmbeddingExtractor *extractor =
SherpaOnnxCreateSpeakerEmbeddingExtractorOHOS(&c, mgr.get());
#else
const SherpaOnnxSpeakerEmbeddingExtractor *extractor = const SherpaOnnxSpeakerEmbeddingExtractor *extractor =
SherpaOnnxCreateSpeakerEmbeddingExtractor(&c); SherpaOnnxCreateSpeakerEmbeddingExtractor(&c);
#endif
if (c.model) { if (c.model) {
delete[] c.model; delete[] c.model;

View File

@@ -47,3 +47,18 @@ export type TtsOutput = {
export const offlineTtsGenerate: (handle: object, input: object) => TtsOutput; export const offlineTtsGenerate: (handle: object, input: object) => TtsOutput;
export const offlineTtsGenerateAsync: (handle: object, input: object) => Promise<TtsOutput>; export const offlineTtsGenerateAsync: (handle: object, input: object) => Promise<TtsOutput>;
export const createSpeakerEmbeddingExtractor: (config: object, mgr?: object) => object;
export const speakerEmbeddingExtractorDim: (handle: object) => number;
export const speakerEmbeddingExtractorCreateStream: (handle: object) => object;
export const speakerEmbeddingExtractorIsReady: (handle: object, stream: object) => boolean;
export const speakerEmbeddingExtractorComputeEmbedding: (handle: object, stream: object, enableExternalBuffer: boolean) => Float32Array;
export const createSpeakerEmbeddingManager: (dim: number) => object;
export const speakerEmbeddingManagerAdd: (handle: object, speaker: {name: string, v: Float32Array}) => boolean;
export const speakerEmbeddingManagerAddListFlattened: (handle: object, speaker: {name: string, vv: Float32Array, n: number}) => boolean;
export const speakerEmbeddingManagerRemove: (handle: object, name: string) => boolean;
export const speakerEmbeddingManagerSearch: (handle: object, obj: {v: Float32Array, threshold: number}) => string;
export const speakerEmbeddingManagerVerify: (handle: object, obj: {name: string, v: Float32Array, threshold: number}) => boolean;
export const speakerEmbeddingManagerContains: (handle: object, name: string) => boolean;
export const speakerEmbeddingManagerNumSpeakers: (handle: object) => number;
export const speakerEmbeddingManagerGetAllSpeakers: (handle: object) => Array<string>;

View File

@@ -4,7 +4,7 @@ import {
getOfflineTtsSampleRate, getOfflineTtsSampleRate,
offlineTtsGenerate, offlineTtsGenerate,
offlineTtsGenerateAsync, offlineTtsGenerateAsync,
} from "libsherpa_onnx.so"; } from 'libsherpa_onnx.so';
export class OfflineTtsVitsModelConfig { export class OfflineTtsVitsModelConfig {
public model: string = ''; public model: string = '';

View File

@@ -0,0 +1,139 @@
import {
createSpeakerEmbeddingExtractor,
createSpeakerEmbeddingManager,
speakerEmbeddingExtractorComputeEmbedding,
speakerEmbeddingExtractorCreateStream,
speakerEmbeddingExtractorDim,
speakerEmbeddingExtractorIsReady,
speakerEmbeddingManagerAdd,
speakerEmbeddingManagerAddListFlattened,
speakerEmbeddingManagerContains,
speakerEmbeddingManagerGetAllSpeakers,
speakerEmbeddingManagerNumSpeakers,
speakerEmbeddingManagerRemove,
speakerEmbeddingManagerSearch,
speakerEmbeddingManagerVerify
} from 'libsherpa_onnx.so';
import { OnlineStream } from './StreamingAsr';
export class SpeakerEmbeddingExtractorConfig {
public model: string = '';
public numThreads: number = 1;
public debug: boolean = false;
public provider: string = 'cpu';
}
export class SpeakerEmbeddingExtractor {
public config: SpeakerEmbeddingExtractorConfig = new SpeakerEmbeddingExtractorConfig();
public dim: number;
private handle: object;
constructor(config: SpeakerEmbeddingExtractorConfig, mgr?: object) {
this.handle = createSpeakerEmbeddingExtractor(config, mgr);
this.config = config;
this.dim = speakerEmbeddingExtractorDim(this.handle);
}
createStream(): OnlineStream {
return new OnlineStream(
speakerEmbeddingExtractorCreateStream(this.handle));
}
isReady(stream: OnlineStream): boolean {
return speakerEmbeddingExtractorIsReady(this.handle, stream.handle);
}
compute(stream: OnlineStream, enableExternalBuffer: boolean = true): Float32Array {
return speakerEmbeddingExtractorComputeEmbedding(
this.handle, stream.handle, enableExternalBuffer);
}
}
function flatten(arrayList: Float32Array[]): Float32Array {
let n = 0;
for (let i = 0; i < arrayList.length; ++i) {
n += arrayList[i].length;
}
let ans = new Float32Array(n);
let offset = 0;
for (let i = 0; i < arrayList.length; ++i) {
ans.set(arrayList[i], offset);
offset += arrayList[i].length;
}
return ans;
}
interface SpeakerNameWithEmbedding {
name: string;
v: Float32Array;
}
interface SpeakerNameWithEmbeddingList {
name: string;
v: Float32Array[];
}
interface SpeakerNameWithEmbeddingN {
name: string;
vv: Float32Array;
n: number;
}
interface EmbeddingWithThreshold {
v: Float32Array;
threshold: number;
}
interface SpeakerNameEmbeddingThreshold {
name: string;
v: Float32Array;
threshold: number;
}
export class SpeakerEmbeddingManager {
public dim: number;
private handle: object;
constructor(dim: number) {
this.handle = createSpeakerEmbeddingManager(dim);
this.dim = dim;
}
add(speaker: SpeakerNameWithEmbedding): boolean {
return speakerEmbeddingManagerAdd(this.handle, speaker);
}
addMulti(speaker: SpeakerNameWithEmbeddingList): boolean {
const c: SpeakerNameWithEmbeddingN = {
name: speaker.name,
vv: flatten(speaker.v),
n: speaker.v.length,
};
return speakerEmbeddingManagerAddListFlattened(this.handle, c);
}
remove(name: string): boolean {
return speakerEmbeddingManagerRemove(this.handle, name);
}
search(obj: EmbeddingWithThreshold): string {
return speakerEmbeddingManagerSearch(this.handle, obj);
}
verify(obj: SpeakerNameEmbeddingThreshold): boolean {
return speakerEmbeddingManagerVerify(this.handle, obj);
}
contains(name: string): boolean {
return speakerEmbeddingManagerContains(this.handle, name);
}
getNumSpeakers(): number {
return speakerEmbeddingManagerNumSpeakers(this.handle);
}
getAllSpeakerNames(): string[] {
return speakerEmbeddingManagerGetAllSpeakers(this.handle);
}
}

View File

@@ -1328,8 +1328,8 @@ struct SherpaOnnxSpeakerEmbeddingExtractor {
std::unique_ptr<sherpa_onnx::SpeakerEmbeddingExtractor> impl; std::unique_ptr<sherpa_onnx::SpeakerEmbeddingExtractor> impl;
}; };
const SherpaOnnxSpeakerEmbeddingExtractor * static sherpa_onnx::SpeakerEmbeddingExtractorConfig
SherpaOnnxCreateSpeakerEmbeddingExtractor( GetSpeakerEmbeddingExtractorConfig(
const SherpaOnnxSpeakerEmbeddingExtractorConfig *config) { const SherpaOnnxSpeakerEmbeddingExtractorConfig *config) {
sherpa_onnx::SpeakerEmbeddingExtractorConfig c; sherpa_onnx::SpeakerEmbeddingExtractorConfig c;
c.model = SHERPA_ONNX_OR(config->model, ""); c.model = SHERPA_ONNX_OR(config->model, "");
@@ -1342,9 +1342,21 @@ SherpaOnnxCreateSpeakerEmbeddingExtractor(
} }
if (config->debug) { if (config->debug) {
#if __OHOS__
SHERPA_ONNX_LOGE("%{public}s\n", c.ToString().c_str());
#else
SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str()); SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str());
#endif
} }
return c;
}
const SherpaOnnxSpeakerEmbeddingExtractor *
SherpaOnnxCreateSpeakerEmbeddingExtractor(
const SherpaOnnxSpeakerEmbeddingExtractorConfig *config) {
auto c = GetSpeakerEmbeddingExtractorConfig(config);
if (!c.Validate()) { if (!c.Validate()) {
SHERPA_ONNX_LOGE("Errors in config!"); SHERPA_ONNX_LOGE("Errors in config!");
return nullptr; return nullptr;
@@ -1983,6 +1995,23 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetectorOHOS(
return p; return p;
} }
const SherpaOnnxSpeakerEmbeddingExtractor *
SherpaOnnxCreateSpeakerEmbeddingExtractorOHOS(
const SherpaOnnxSpeakerEmbeddingExtractorConfig *config,
NativeResourceManager *mgr) {
if (!mgr) {
return SherpaOnnxCreateSpeakerEmbeddingExtractor(config);
}
auto c = GetSpeakerEmbeddingExtractorConfig(config);
auto p = new SherpaOnnxSpeakerEmbeddingExtractor;
p->impl = std::make_unique<sherpa_onnx::SpeakerEmbeddingExtractor>(mgr, c);
return p;
}
#if SHERPA_ONNX_ENABLE_TTS == 1 #if SHERPA_ONNX_ENABLE_TTS == 1
SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS( SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr) { const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr) {

View File

@@ -1572,6 +1572,11 @@ SherpaOnnxCreateVoiceActivityDetectorOHOS(
SHERPA_ONNX_API SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS( SHERPA_ONNX_API SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr); const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr);
SHERPA_ONNX_API const SherpaOnnxSpeakerEmbeddingExtractor *
SherpaOnnxCreateSpeakerEmbeddingExtractorOHOS(
const SherpaOnnxSpeakerEmbeddingExtractorConfig *config,
NativeResourceManager *mgr);
#endif #endif
#if defined(__GNUC__) #if defined(__GNUC__)

View File

@@ -62,9 +62,9 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
for (const auto &f : files) { for (const auto &f : files) {
if (config.model.debug) { if (config.model.debug) {
#if __OHOS__ #if __OHOS__
SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
#else
SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str()); SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str());
#else
SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
#endif #endif
} }
std::unique_ptr<fst::FarReader<fst::StdArc>> reader( std::unique_ptr<fst::FarReader<fst::StdArc>> reader(

View File

@@ -22,11 +22,10 @@ class SpeakerEmbeddingExtractorGeneralImpl
const SpeakerEmbeddingExtractorConfig &config) const SpeakerEmbeddingExtractorConfig &config)
: model_(config) {} : model_(config) {}
#if __ANDROID_API__ >= 9 template <typename Manager>
SpeakerEmbeddingExtractorGeneralImpl( SpeakerEmbeddingExtractorGeneralImpl(
AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) Manager *mgr, const SpeakerEmbeddingExtractorConfig &config)
: model_(mgr, config) {} : model_(mgr, config) {}
#endif
int32_t Dim() const override { return model_.GetMetaData().output_dim; } int32_t Dim() const override { return model_.GetMetaData().output_dim; }
@@ -46,9 +45,15 @@ class SpeakerEmbeddingExtractorGeneralImpl
std::vector<float> Compute(OnlineStream *s) const override { std::vector<float> Compute(OnlineStream *s) const override {
int32_t num_frames = s->NumFramesReady() - s->GetNumProcessedFrames(); int32_t num_frames = s->NumFramesReady() - s->GetNumProcessedFrames();
if (num_frames <= 0) { if (num_frames <= 0) {
#if __OHOS__
SHERPA_ONNX_LOGE(
"Please make sure IsReady(s) returns true. num_frames: %{public}d",
num_frames);
#else
SHERPA_ONNX_LOGE( SHERPA_ONNX_LOGE(
"Please make sure IsReady(s) returns true. num_frames: %d", "Please make sure IsReady(s) returns true. num_frames: %d",
num_frames); num_frames);
#endif
return {}; return {};
} }
@@ -64,8 +69,13 @@ class SpeakerEmbeddingExtractorGeneralImpl
if (meta_data.feature_normalize_type == "global-mean") { if (meta_data.feature_normalize_type == "global-mean") {
SubtractGlobalMean(features.data(), num_frames, feat_dim); SubtractGlobalMean(features.data(), num_frames, feat_dim);
} else { } else {
#if __OHOS__
SHERPA_ONNX_LOGE("Unsupported feature_normalize_type: %{public}s",
meta_data.feature_normalize_type.c_str());
#else
SHERPA_ONNX_LOGE("Unsupported feature_normalize_type: %s", SHERPA_ONNX_LOGE("Unsupported feature_normalize_type: %s",
meta_data.feature_normalize_type.c_str()); meta_data.feature_normalize_type.c_str());
#endif
exit(-1); exit(-1);
} }
} }

View File

@@ -3,6 +3,15 @@
// Copyright (c) 2024 Xiaomi Corporation // Copyright (c) 2024 Xiaomi Corporation
#include "sherpa-onnx/csrc/speaker-embedding-extractor-impl.h" #include "sherpa-onnx/csrc/speaker-embedding-extractor-impl.h"
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif
#include "sherpa-onnx/csrc/macros.h" #include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h" #include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/speaker-embedding-extractor-general-impl.h" #include "sherpa-onnx/csrc/speaker-embedding-extractor-general-impl.h"
@@ -35,7 +44,11 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,
if (debug) { if (debug) {
std::ostringstream os; std::ostringstream os;
PrintModelMetadata(os, meta_data); PrintModelMetadata(os, meta_data);
#if __OHOS__
SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
SHERPA_ONNX_LOGE("%s", os.str().c_str()); SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
} }
Ort::AllocatorWithDefaultOptions allocator; Ort::AllocatorWithDefaultOptions allocator;
@@ -59,7 +72,11 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,
} else if (model_type == "nemo") { } else if (model_type == "nemo") {
return ModelType::kNeMo; return ModelType::kNeMo;
} else { } else {
#if __OHOS__
SHERPA_ONNX_LOGE("Unsupported model_type: %{public}s", model_type.c_str());
#else
SHERPA_ONNX_LOGE("Unsupported model_type: %s", model_type.c_str()); SHERPA_ONNX_LOGE("Unsupported model_type: %s", model_type.c_str());
#endif
return ModelType::kUnknown; return ModelType::kUnknown;
} }
} }
@@ -91,10 +108,10 @@ SpeakerEmbeddingExtractorImpl::Create(
return nullptr; return nullptr;
} }
#if __ANDROID_API__ >= 9 template <typename Manager>
std::unique_ptr<SpeakerEmbeddingExtractorImpl> std::unique_ptr<SpeakerEmbeddingExtractorImpl>
SpeakerEmbeddingExtractorImpl::Create( SpeakerEmbeddingExtractorImpl::Create(
AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) { Manager *mgr, const SpeakerEmbeddingExtractorConfig &config) {
ModelType model_type = ModelType::kUnknown; ModelType model_type = ModelType::kUnknown;
{ {
@@ -120,6 +137,17 @@ SpeakerEmbeddingExtractorImpl::Create(
// unreachable code // unreachable code
return nullptr; return nullptr;
} }
#if __ANDROID_API__ >= 9
template std::unique_ptr<SpeakerEmbeddingExtractorImpl>
SpeakerEmbeddingExtractorImpl::Create(
AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
#endif
#if __OHOS__
template std::unique_ptr<SpeakerEmbeddingExtractorImpl>
SpeakerEmbeddingExtractorImpl::Create(
NativeResourceManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
#endif #endif
} // namespace sherpa_onnx } // namespace sherpa_onnx

View File

@@ -9,11 +9,6 @@
#include <string> #include <string>
#include <vector> #include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "sherpa-onnx/csrc/speaker-embedding-extractor.h" #include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
namespace sherpa_onnx { namespace sherpa_onnx {
@@ -25,10 +20,9 @@ class SpeakerEmbeddingExtractorImpl {
static std::unique_ptr<SpeakerEmbeddingExtractorImpl> Create( static std::unique_ptr<SpeakerEmbeddingExtractorImpl> Create(
const SpeakerEmbeddingExtractorConfig &config); const SpeakerEmbeddingExtractorConfig &config);
#if __ANDROID_API__ >= 9 template <typename Manager>
static std::unique_ptr<SpeakerEmbeddingExtractorImpl> Create( static std::unique_ptr<SpeakerEmbeddingExtractorImpl> Create(
AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config); Manager *mgr, const SpeakerEmbeddingExtractorConfig &config);
#endif
virtual int32_t Dim() const = 0; virtual int32_t Dim() const = 0;

View File

@@ -8,6 +8,15 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif
#include "sherpa-onnx/csrc/macros.h" #include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h" #include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h" #include "sherpa-onnx/csrc/session.h"
@@ -28,8 +37,8 @@ class SpeakerEmbeddingExtractorModel::Impl {
} }
} }
#if __ANDROID_API__ >= 9 template <typename Manager>
Impl(AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) Impl(Manager *mgr, const SpeakerEmbeddingExtractorConfig &config)
: config_(config), : config_(config),
env_(ORT_LOGGING_LEVEL_ERROR), env_(ORT_LOGGING_LEVEL_ERROR),
sess_opts_(GetSessionOptions(config)), sess_opts_(GetSessionOptions(config)),
@@ -39,7 +48,6 @@ class SpeakerEmbeddingExtractorModel::Impl {
Init(buf.data(), buf.size()); Init(buf.data(), buf.size());
} }
} }
#endif
Ort::Value Compute(Ort::Value x) const { Ort::Value Compute(Ort::Value x) const {
std::array<Ort::Value, 1> inputs = {std::move(x)}; std::array<Ort::Value, 1> inputs = {std::move(x)};
@@ -68,7 +76,11 @@ class SpeakerEmbeddingExtractorModel::Impl {
if (config_.debug) { if (config_.debug) {
std::ostringstream os; std::ostringstream os;
PrintModelMetadata(os, meta_data); PrintModelMetadata(os, meta_data);
#if __OHOS__
SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
SHERPA_ONNX_LOGE("%s", os.str().c_str()); SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
} }
Ort::AllocatorWithDefaultOptions allocator; // used in the macro below Ort::AllocatorWithDefaultOptions allocator; // used in the macro below
@@ -84,8 +96,14 @@ class SpeakerEmbeddingExtractorModel::Impl {
std::string framework; std::string framework;
SHERPA_ONNX_READ_META_DATA_STR(framework, "framework"); SHERPA_ONNX_READ_META_DATA_STR(framework, "framework");
if (framework != "wespeaker" && framework != "3d-speaker") { if (framework != "wespeaker" && framework != "3d-speaker") {
#if __OHOS__
SHERPA_ONNX_LOGE(
"Expect a wespeaker or a 3d-speaker model, given: %{public}s",
framework.c_str());
#else
SHERPA_ONNX_LOGE("Expect a wespeaker or a 3d-speaker model, given: %s", SHERPA_ONNX_LOGE("Expect a wespeaker or a 3d-speaker model, given: %s",
framework.c_str()); framework.c_str());
#endif
exit(-1); exit(-1);
} }
} }
@@ -111,11 +129,10 @@ SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel(
const SpeakerEmbeddingExtractorConfig &config) const SpeakerEmbeddingExtractorConfig &config)
: impl_(std::make_unique<Impl>(config)) {} : impl_(std::make_unique<Impl>(config)) {}
#if __ANDROID_API__ >= 9 template <typename Manager>
SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel( SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel(
AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) Manager *mgr, const SpeakerEmbeddingExtractorConfig &config)
: impl_(std::make_unique<Impl>(mgr, config)) {} : impl_(std::make_unique<Impl>(mgr, config)) {}
#endif
SpeakerEmbeddingExtractorModel::~SpeakerEmbeddingExtractorModel() = default; SpeakerEmbeddingExtractorModel::~SpeakerEmbeddingExtractorModel() = default;
@@ -128,4 +145,14 @@ Ort::Value SpeakerEmbeddingExtractorModel::Compute(Ort::Value x) const {
return impl_->Compute(std::move(x)); return impl_->Compute(std::move(x));
} }
#if __ANDROID_API__ >= 9
template SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel(
AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
#endif
#if __OHOS__
template SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel(
NativeResourceManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
#endif
} // namespace sherpa_onnx } // namespace sherpa_onnx

View File

@@ -6,11 +6,6 @@
#include <memory> #include <memory>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "onnxruntime_cxx_api.h" // NOLINT #include "onnxruntime_cxx_api.h" // NOLINT
#include "sherpa-onnx/csrc/speaker-embedding-extractor-model-meta-data.h" #include "sherpa-onnx/csrc/speaker-embedding-extractor-model-meta-data.h"
#include "sherpa-onnx/csrc/speaker-embedding-extractor.h" #include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
@@ -22,10 +17,9 @@ class SpeakerEmbeddingExtractorModel {
explicit SpeakerEmbeddingExtractorModel( explicit SpeakerEmbeddingExtractorModel(
const SpeakerEmbeddingExtractorConfig &config); const SpeakerEmbeddingExtractorConfig &config);
#if __ANDROID_API__ >= 9 template <typename Manager>
SpeakerEmbeddingExtractorModel(AAssetManager *mgr, SpeakerEmbeddingExtractorModel(Manager *mgr,
const SpeakerEmbeddingExtractorConfig &config); const SpeakerEmbeddingExtractorConfig &config);
#endif
~SpeakerEmbeddingExtractorModel(); ~SpeakerEmbeddingExtractorModel();

View File

@@ -22,11 +22,10 @@ class SpeakerEmbeddingExtractorNeMoImpl : public SpeakerEmbeddingExtractorImpl {
const SpeakerEmbeddingExtractorConfig &config) const SpeakerEmbeddingExtractorConfig &config)
: model_(config) {} : model_(config) {}
#if __ANDROID_API__ >= 9 template <typename Manager>
SpeakerEmbeddingExtractorNeMoImpl( SpeakerEmbeddingExtractorNeMoImpl(
AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) Manager *mgr, const SpeakerEmbeddingExtractorConfig &config)
: model_(mgr, config) {} : model_(mgr, config) {}
#endif
int32_t Dim() const override { return model_.GetMetaData().output_dim; } int32_t Dim() const override { return model_.GetMetaData().output_dim; }
@@ -54,9 +53,15 @@ class SpeakerEmbeddingExtractorNeMoImpl : public SpeakerEmbeddingExtractorImpl {
std::vector<float> Compute(OnlineStream *s) const override { std::vector<float> Compute(OnlineStream *s) const override {
int32_t num_frames = s->NumFramesReady() - s->GetNumProcessedFrames(); int32_t num_frames = s->NumFramesReady() - s->GetNumProcessedFrames();
if (num_frames <= 0) { if (num_frames <= 0) {
#if __OHOS__
SHERPA_ONNX_LOGE(
"Please make sure IsReady(s) returns true. num_frames: %{public}d",
num_frames);
#else
SHERPA_ONNX_LOGE( SHERPA_ONNX_LOGE(
"Please make sure IsReady(s) returns true. num_frames: %d", "Please make sure IsReady(s) returns true. num_frames: %d",
num_frames); num_frames);
#endif
return {}; return {};
} }
@@ -72,8 +77,14 @@ class SpeakerEmbeddingExtractorNeMoImpl : public SpeakerEmbeddingExtractorImpl {
if (meta_data.feature_normalize_type == "per_feature") { if (meta_data.feature_normalize_type == "per_feature") {
NormalizePerFeature(features.data(), num_frames, feat_dim); NormalizePerFeature(features.data(), num_frames, feat_dim);
} else { } else {
#if __OHOS__
SHERPA_ONNX_LOGE("Unsupported feature_normalize_type: %{public}s",
meta_data.feature_normalize_type.c_str());
#else
SHERPA_ONNX_LOGE("Unsupported feature_normalize_type: %s", SHERPA_ONNX_LOGE("Unsupported feature_normalize_type: %s",
meta_data.feature_normalize_type.c_str()); meta_data.feature_normalize_type.c_str());
#endif
exit(-1); exit(-1);
} }
} }

View File

@@ -8,6 +8,15 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif
#include "sherpa-onnx/csrc/macros.h" #include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h" #include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h" #include "sherpa-onnx/csrc/session.h"
@@ -28,8 +37,8 @@ class SpeakerEmbeddingExtractorNeMoModel::Impl {
} }
} }
#if __ANDROID_API__ >= 9 template <typename Manager>
Impl(AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) Impl(Manager *mgr, const SpeakerEmbeddingExtractorConfig &config)
: config_(config), : config_(config),
env_(ORT_LOGGING_LEVEL_ERROR), env_(ORT_LOGGING_LEVEL_ERROR),
sess_opts_(GetSessionOptions(config)), sess_opts_(GetSessionOptions(config)),
@@ -39,7 +48,6 @@ class SpeakerEmbeddingExtractorNeMoModel::Impl {
Init(buf.data(), buf.size()); Init(buf.data(), buf.size());
} }
} }
#endif
Ort::Value Compute(Ort::Value x, Ort::Value x_lens) const { Ort::Value Compute(Ort::Value x, Ort::Value x_lens) const {
std::array<Ort::Value, 2> inputs = {std::move(x), std::move(x_lens)}; std::array<Ort::Value, 2> inputs = {std::move(x), std::move(x_lens)};
@@ -73,7 +81,11 @@ class SpeakerEmbeddingExtractorNeMoModel::Impl {
if (config_.debug) { if (config_.debug) {
std::ostringstream os; std::ostringstream os;
PrintModelMetadata(os, meta_data); PrintModelMetadata(os, meta_data);
#if __OHOS__
SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
SHERPA_ONNX_LOGE("%s", os.str().c_str()); SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
} }
Ort::AllocatorWithDefaultOptions allocator; // used in the macro below Ort::AllocatorWithDefaultOptions allocator; // used in the macro below
@@ -93,7 +105,12 @@ class SpeakerEmbeddingExtractorNeMoModel::Impl {
std::string framework; std::string framework;
SHERPA_ONNX_READ_META_DATA_STR(framework, "framework"); SHERPA_ONNX_READ_META_DATA_STR(framework, "framework");
if (framework != "nemo") { if (framework != "nemo") {
#if __OHOS__
SHERPA_ONNX_LOGE("Expect a NeMo model, given: %{public}s",
framework.c_str());
#else
SHERPA_ONNX_LOGE("Expect a NeMo model, given: %s", framework.c_str()); SHERPA_ONNX_LOGE("Expect a NeMo model, given: %s", framework.c_str());
#endif
exit(-1); exit(-1);
} }
} }
@@ -119,11 +136,10 @@ SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel(
const SpeakerEmbeddingExtractorConfig &config) const SpeakerEmbeddingExtractorConfig &config)
: impl_(std::make_unique<Impl>(config)) {} : impl_(std::make_unique<Impl>(config)) {}
#if __ANDROID_API__ >= 9 template <typename Manager>
SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel( SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel(
AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) Manager *mgr, const SpeakerEmbeddingExtractorConfig &config)
: impl_(std::make_unique<Impl>(mgr, config)) {} : impl_(std::make_unique<Impl>(mgr, config)) {}
#endif
SpeakerEmbeddingExtractorNeMoModel::~SpeakerEmbeddingExtractorNeMoModel() = SpeakerEmbeddingExtractorNeMoModel::~SpeakerEmbeddingExtractorNeMoModel() =
default; default;
@@ -142,4 +158,14 @@ OrtAllocator *SpeakerEmbeddingExtractorNeMoModel::Allocator() const {
return impl_->Allocator(); return impl_->Allocator();
} }
#if __ANDROID_API__ >= 9
template SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel(
AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
#endif
#if __OHOS__
template SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel(
NativeResourceManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
#endif
} // namespace sherpa_onnx } // namespace sherpa_onnx

View File

@@ -6,11 +6,6 @@
#include <memory> #include <memory>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "onnxruntime_cxx_api.h" // NOLINT #include "onnxruntime_cxx_api.h" // NOLINT
#include "sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model-meta-data.h" #include "sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model-meta-data.h"
#include "sherpa-onnx/csrc/speaker-embedding-extractor.h" #include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
@@ -22,10 +17,9 @@ class SpeakerEmbeddingExtractorNeMoModel {
explicit SpeakerEmbeddingExtractorNeMoModel( explicit SpeakerEmbeddingExtractorNeMoModel(
const SpeakerEmbeddingExtractorConfig &config); const SpeakerEmbeddingExtractorConfig &config);
#if __ANDROID_API__ >= 9 template <typename Manager>
SpeakerEmbeddingExtractorNeMoModel( SpeakerEmbeddingExtractorNeMoModel(
AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config); Manager *mgr, const SpeakerEmbeddingExtractorConfig &config);
#endif
~SpeakerEmbeddingExtractorNeMoModel(); ~SpeakerEmbeddingExtractorNeMoModel();

View File

@@ -6,6 +6,15 @@
#include <vector> #include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif
#include "sherpa-onnx/csrc/file-utils.h" #include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h" #include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/speaker-embedding-extractor-impl.h" #include "sherpa-onnx/csrc/speaker-embedding-extractor-impl.h"
@@ -55,11 +64,10 @@ SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor(
const SpeakerEmbeddingExtractorConfig &config) const SpeakerEmbeddingExtractorConfig &config)
: impl_(SpeakerEmbeddingExtractorImpl::Create(config)) {} : impl_(SpeakerEmbeddingExtractorImpl::Create(config)) {}
#if __ANDROID_API__ >= 9 template <typename Manager>
SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor( SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor(
AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) Manager *mgr, const SpeakerEmbeddingExtractorConfig &config)
: impl_(SpeakerEmbeddingExtractorImpl::Create(mgr, config)) {} : impl_(SpeakerEmbeddingExtractorImpl::Create(mgr, config)) {}
#endif
SpeakerEmbeddingExtractor::~SpeakerEmbeddingExtractor() = default; SpeakerEmbeddingExtractor::~SpeakerEmbeddingExtractor() = default;
@@ -77,4 +85,14 @@ std::vector<float> SpeakerEmbeddingExtractor::Compute(OnlineStream *s) const {
return impl_->Compute(s); return impl_->Compute(s);
} }
#if __ANDROID_API__ >= 9
template SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor(
AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
#endif
#if __OHOS__
template SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor(
NativeResourceManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
#endif
} // namespace sherpa_onnx } // namespace sherpa_onnx

View File

@@ -9,11 +9,6 @@
#include <string> #include <string>
#include <vector> #include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "sherpa-onnx/csrc/online-stream.h" #include "sherpa-onnx/csrc/online-stream.h"
#include "sherpa-onnx/csrc/parse-options.h" #include "sherpa-onnx/csrc/parse-options.h"
@@ -45,10 +40,9 @@ class SpeakerEmbeddingExtractor {
explicit SpeakerEmbeddingExtractor( explicit SpeakerEmbeddingExtractor(
const SpeakerEmbeddingExtractorConfig &config); const SpeakerEmbeddingExtractorConfig &config);
#if __ANDROID_API__ >= 9 template <typename Manager>
SpeakerEmbeddingExtractor(AAssetManager *mgr, SpeakerEmbeddingExtractor(Manager *mgr,
const SpeakerEmbeddingExtractorConfig &config); const SpeakerEmbeddingExtractorConfig &config);
#endif
~SpeakerEmbeddingExtractor(); ~SpeakerEmbeddingExtractor();