Add speaker identification APIs for HarmonyOS (#1607)
* Add speaker embedding extractor API for HarmonyOS * Add ArkTS API for speaker identification
This commit is contained in:
@@ -51,3 +51,9 @@ export {
|
||||
TtsOutput,
|
||||
TtsInput,
|
||||
} from './src/main/ets/components/NonStreamingTts';
|
||||
|
||||
export {
|
||||
SpeakerEmbeddingExtractorConfig,
|
||||
SpeakerEmbeddingExtractor,
|
||||
SpeakerEmbeddingManager,
|
||||
} from './src/main/ets/components/SpeakerIdentification';
|
||||
|
||||
@@ -11,6 +11,17 @@
|
||||
static Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>
|
||||
CreateSpeakerEmbeddingExtractorWrapper(const Napi::CallbackInfo &info) {
|
||||
Napi::Env env = info.Env();
|
||||
|
||||
#if __OHOS__
|
||||
if (info.Length() != 2) {
|
||||
std::ostringstream os;
|
||||
os << "Expect only 2 arguments. Given: " << info.Length();
|
||||
|
||||
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
|
||||
|
||||
return {};
|
||||
}
|
||||
#else
|
||||
if (info.Length() != 1) {
|
||||
std::ostringstream os;
|
||||
os << "Expect only 1 argument. Given: " << info.Length();
|
||||
@@ -19,6 +30,7 @@ CreateSpeakerEmbeddingExtractorWrapper(const Napi::CallbackInfo &info) {
|
||||
|
||||
return {};
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!info[0].IsObject()) {
|
||||
Napi::TypeError::New(env, "You should pass an object as the only argument.")
|
||||
@@ -46,8 +58,18 @@ CreateSpeakerEmbeddingExtractorWrapper(const Napi::CallbackInfo &info) {
|
||||
|
||||
SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
|
||||
|
||||
#if __OHOS__
|
||||
std::unique_ptr<NativeResourceManager,
|
||||
decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
|
||||
mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
|
||||
&OH_ResourceManager_ReleaseNativeResourceManager);
|
||||
|
||||
const SherpaOnnxSpeakerEmbeddingExtractor *extractor =
|
||||
SherpaOnnxCreateSpeakerEmbeddingExtractorOHOS(&c, mgr.get());
|
||||
#else
|
||||
const SherpaOnnxSpeakerEmbeddingExtractor *extractor =
|
||||
SherpaOnnxCreateSpeakerEmbeddingExtractor(&c);
|
||||
#endif
|
||||
|
||||
if (c.model) {
|
||||
delete[] c.model;
|
||||
|
||||
@@ -47,3 +47,18 @@ export type TtsOutput = {
|
||||
|
||||
export const offlineTtsGenerate: (handle: object, input: object) => TtsOutput;
|
||||
export const offlineTtsGenerateAsync: (handle: object, input: object) => Promise<TtsOutput>;
|
||||
|
||||
export const createSpeakerEmbeddingExtractor: (config: object, mgr?: object) => object;
|
||||
export const speakerEmbeddingExtractorDim: (handle: object) => number;
|
||||
export const speakerEmbeddingExtractorCreateStream: (handle: object) => object;
|
||||
export const speakerEmbeddingExtractorIsReady: (handle: object, stream: object) => boolean;
|
||||
export const speakerEmbeddingExtractorComputeEmbedding: (handle: object, stream: object, enableExternalBuffer: boolean) => Float32Array;
|
||||
export const createSpeakerEmbeddingManager: (dim: number) => object;
|
||||
export const speakerEmbeddingManagerAdd: (handle: object, speaker: {name: string, v: Float32Array}) => boolean;
|
||||
export const speakerEmbeddingManagerAddListFlattened: (handle: object, speaker: {name: string, vv: Float32Array, n: number}) => boolean;
|
||||
export const speakerEmbeddingManagerRemove: (handle: object, name: string) => boolean;
|
||||
export const speakerEmbeddingManagerSearch: (handle: object, obj: {v: Float32Array, threshold: number}) => string;
|
||||
export const speakerEmbeddingManagerVerify: (handle: object, obj: {name: string, v: Float32Array, threshold: number}) => boolean;
|
||||
export const speakerEmbeddingManagerContains: (handle: object, name: string) => boolean;
|
||||
export const speakerEmbeddingManagerNumSpeakers: (handle: object) => number;
|
||||
export const speakerEmbeddingManagerGetAllSpeakers: (handle: object) => Array<string>;
|
||||
|
||||
@@ -4,7 +4,7 @@ import {
|
||||
getOfflineTtsSampleRate,
|
||||
offlineTtsGenerate,
|
||||
offlineTtsGenerateAsync,
|
||||
} from "libsherpa_onnx.so";
|
||||
} from 'libsherpa_onnx.so';
|
||||
|
||||
export class OfflineTtsVitsModelConfig {
|
||||
public model: string = '';
|
||||
|
||||
@@ -0,0 +1,139 @@
|
||||
import {
|
||||
createSpeakerEmbeddingExtractor,
|
||||
createSpeakerEmbeddingManager,
|
||||
speakerEmbeddingExtractorComputeEmbedding,
|
||||
speakerEmbeddingExtractorCreateStream,
|
||||
speakerEmbeddingExtractorDim,
|
||||
speakerEmbeddingExtractorIsReady,
|
||||
speakerEmbeddingManagerAdd,
|
||||
speakerEmbeddingManagerAddListFlattened,
|
||||
speakerEmbeddingManagerContains,
|
||||
speakerEmbeddingManagerGetAllSpeakers,
|
||||
speakerEmbeddingManagerNumSpeakers,
|
||||
speakerEmbeddingManagerRemove,
|
||||
speakerEmbeddingManagerSearch,
|
||||
speakerEmbeddingManagerVerify
|
||||
} from 'libsherpa_onnx.so';
|
||||
import { OnlineStream } from './StreamingAsr';
|
||||
|
||||
export class SpeakerEmbeddingExtractorConfig {
|
||||
public model: string = '';
|
||||
public numThreads: number = 1;
|
||||
public debug: boolean = false;
|
||||
public provider: string = 'cpu';
|
||||
}
|
||||
|
||||
export class SpeakerEmbeddingExtractor {
|
||||
public config: SpeakerEmbeddingExtractorConfig = new SpeakerEmbeddingExtractorConfig();
|
||||
public dim: number;
|
||||
private handle: object;
|
||||
|
||||
constructor(config: SpeakerEmbeddingExtractorConfig, mgr?: object) {
|
||||
this.handle = createSpeakerEmbeddingExtractor(config, mgr);
|
||||
this.config = config;
|
||||
this.dim = speakerEmbeddingExtractorDim(this.handle);
|
||||
}
|
||||
|
||||
createStream(): OnlineStream {
|
||||
return new OnlineStream(
|
||||
speakerEmbeddingExtractorCreateStream(this.handle));
|
||||
}
|
||||
|
||||
isReady(stream: OnlineStream): boolean {
|
||||
return speakerEmbeddingExtractorIsReady(this.handle, stream.handle);
|
||||
}
|
||||
|
||||
compute(stream: OnlineStream, enableExternalBuffer: boolean = true): Float32Array {
|
||||
return speakerEmbeddingExtractorComputeEmbedding(
|
||||
this.handle, stream.handle, enableExternalBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
function flatten(arrayList: Float32Array[]): Float32Array {
|
||||
let n = 0;
|
||||
for (let i = 0; i < arrayList.length; ++i) {
|
||||
n += arrayList[i].length;
|
||||
}
|
||||
let ans = new Float32Array(n);
|
||||
|
||||
let offset = 0;
|
||||
for (let i = 0; i < arrayList.length; ++i) {
|
||||
ans.set(arrayList[i], offset);
|
||||
offset += arrayList[i].length;
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
|
||||
interface SpeakerNameWithEmbedding {
|
||||
name: string;
|
||||
v: Float32Array;
|
||||
}
|
||||
|
||||
interface SpeakerNameWithEmbeddingList {
|
||||
name: string;
|
||||
v: Float32Array[];
|
||||
}
|
||||
|
||||
interface SpeakerNameWithEmbeddingN {
|
||||
name: string;
|
||||
vv: Float32Array;
|
||||
n: number;
|
||||
}
|
||||
|
||||
interface EmbeddingWithThreshold {
|
||||
v: Float32Array;
|
||||
threshold: number;
|
||||
}
|
||||
|
||||
interface SpeakerNameEmbeddingThreshold {
|
||||
name: string;
|
||||
v: Float32Array;
|
||||
threshold: number;
|
||||
}
|
||||
|
||||
export class SpeakerEmbeddingManager {
|
||||
public dim: number;
|
||||
private handle: object;
|
||||
|
||||
constructor(dim: number) {
|
||||
this.handle = createSpeakerEmbeddingManager(dim);
|
||||
this.dim = dim;
|
||||
}
|
||||
|
||||
add(speaker: SpeakerNameWithEmbedding): boolean {
|
||||
return speakerEmbeddingManagerAdd(this.handle, speaker);
|
||||
}
|
||||
|
||||
addMulti(speaker: SpeakerNameWithEmbeddingList): boolean {
|
||||
const c: SpeakerNameWithEmbeddingN = {
|
||||
name: speaker.name,
|
||||
vv: flatten(speaker.v),
|
||||
n: speaker.v.length,
|
||||
};
|
||||
return speakerEmbeddingManagerAddListFlattened(this.handle, c);
|
||||
}
|
||||
|
||||
remove(name: string): boolean {
|
||||
return speakerEmbeddingManagerRemove(this.handle, name);
|
||||
}
|
||||
|
||||
search(obj: EmbeddingWithThreshold): string {
|
||||
return speakerEmbeddingManagerSearch(this.handle, obj);
|
||||
}
|
||||
|
||||
verify(obj: SpeakerNameEmbeddingThreshold): boolean {
|
||||
return speakerEmbeddingManagerVerify(this.handle, obj);
|
||||
}
|
||||
|
||||
contains(name: string): boolean {
|
||||
return speakerEmbeddingManagerContains(this.handle, name);
|
||||
}
|
||||
|
||||
getNumSpeakers(): number {
|
||||
return speakerEmbeddingManagerNumSpeakers(this.handle);
|
||||
}
|
||||
|
||||
getAllSpeakerNames(): string[] {
|
||||
return speakerEmbeddingManagerGetAllSpeakers(this.handle);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user