Add Java API for speaker identification (#822)

This commit is contained in:
Fangjun Kuang
2024-04-29 21:23:56 +08:00
committed by GitHub
parent 88202f05bb
commit cff207623e
13 changed files with 388 additions and 5 deletions

View File

@@ -14,7 +14,7 @@ public class AudioTaggingConfig {
}
public static Builder builder() {
return new AudioTaggingConfig.Builder();
return new Builder();
}
public static class Builder {

View File

@@ -7,7 +7,7 @@ public class OfflineRecognizer {
System.loadLibrary("sherpa-onnx-jni");
}
private long ptr = 0; // this is the asr engine ptrss
private long ptr = 0;
public OfflineRecognizer(OfflineRecognizerConfig config) {
ptr = newFromFile(config);

View File

@@ -7,7 +7,7 @@ public class OfflineTts {
System.loadLibrary("sherpa-onnx-jni");
}
private long ptr = 0; // this is the asr engine ptrss
private long ptr = 0;
public OfflineTts(OfflineTtsConfig config) {
ptr = newFromFile(config);

View File

@@ -8,7 +8,7 @@ public class OnlineRecognizer {
System.loadLibrary("sherpa-onnx-jni");
}
private long ptr = 0; // this is the asr engine ptrss
private long ptr = 0;
public OnlineRecognizer(OnlineRecognizerConfig config) {

View File

@@ -0,0 +1,57 @@
// Copyright 2024 Xiaomi Corporation
package com.k2fsa.sherpa.onnx;
public class SpeakerEmbeddingExtractor {
static {
System.loadLibrary("sherpa-onnx-jni");
}
private long ptr = 0;
public SpeakerEmbeddingExtractor(SpeakerEmbeddingExtractorConfig config) {
ptr = newFromFile(config);
}
@Override
protected void finalize() throws Throwable {
release();
}
public void release() {
if (this.ptr == 0) {
return;
}
delete(this.ptr);
this.ptr = 0;
}
public OnlineStream createStream() {
long p = createStream(ptr);
return new OnlineStream(p);
}
public boolean isReady(OnlineStream s) {
return isReady(ptr, s.getPtr());
}
public float[] compute(OnlineStream s) {
return compute(ptr, s.getPtr());
}
public int getDim() {
return dim(ptr);
}
private native void delete(long ptr);
private native long newFromFile(SpeakerEmbeddingExtractorConfig config);
private native long createStream(long ptr);
private native boolean isReady(long ptr, long streamPtr);
private native float[] compute(long ptr, long streamPtr);
private native int dim(long ptr);
}

View File

@@ -0,0 +1,54 @@
// Copyright 2024 Xiaomi Corporation
package com.k2fsa.sherpa.onnx;
public class SpeakerEmbeddingExtractorConfig {
private final String model;
private final int numThreads;
private final boolean debug;
private final String provider;
private SpeakerEmbeddingExtractorConfig(Builder builder) {
this.model = builder.model;
this.numThreads = builder.numThreads;
this.debug = builder.debug;
this.provider = builder.provider;
}
public static Builder builder() {
return new Builder();
}
public static class Builder {
private String model = "";
private int numThreads = 1;
private boolean debug = true;
private String provider = "cpu";
public SpeakerEmbeddingExtractorConfig build() {
return new SpeakerEmbeddingExtractorConfig(this);
}
public Builder setModel(String model) {
this.model = model;
return this;
}
public Builder setNumThreads(int numThreads) {
this.numThreads = numThreads;
return this;
}
public Builder setDebug(boolean debug) {
this.debug = debug;
return this;
}
public Builder setProvider(String provider) {
this.provider = provider;
return this;
}
}
}

View File

@@ -0,0 +1,80 @@
// Copyright 2024 Xiaomi Corporation
package com.k2fsa.sherpa.onnx;
public class SpeakerEmbeddingManager {
static {
System.loadLibrary("sherpa-onnx-jni");
}
private long ptr = 0;
public SpeakerEmbeddingManager(int dim) {
ptr = create(dim);
}
@Override
protected void finalize() throws Throwable {
release();
}
public void release() {
if (this.ptr == 0) {
return;
}
delete(this.ptr);
this.ptr = 0;
}
public boolean add(String name, float[] embedding) {
return add(ptr, name, embedding);
}
public boolean add(String name, float[][] embedding) {
return addList(ptr, name, embedding);
}
public boolean remove(String name) {
return remove(ptr, name);
}
public String search(float[] embedding, float threshold) {
return search(ptr, embedding, threshold);
}
public boolean verify(String name, float[] embedding, float threshold) {
return verify(ptr, name, embedding, threshold);
}
public boolean contains(String name) {
return contains(ptr, name);
}
public int getNumSpeakers() {
return numSpeakers(ptr);
}
public String[] getAllSpeakerNames() {
return allSpeakerNames(ptr);
}
private native long create(int dim);
private native void delete(long ptr);
private native boolean add(long ptr, String name, float[] embedding);
private native boolean addList(long ptr, String name, float[][] embedding);
private native boolean remove(long ptr, String name);
private native String search(long ptr, float[] embedding, float threshold);
private native boolean verify(long ptr, String name, float[] embedding, float threshold);
private native boolean contains(long ptr, String name);
private native int numSpeakers(long ptr);
private native String[] allSpeakerNames(long ptr);
}

View File

@@ -12,7 +12,7 @@ public class SpokenLanguageIdentification {
}
private final Map<String, String> localeMap;
private long ptr = 0; // this is the asr engine ptrss
private long ptr = 0;
public SpokenLanguageIdentification(SpokenLanguageIdentificationConfig config) {
ptr = newFromFile(config);