Add Java API for speaker identification (#822)
This commit is contained in:
@@ -14,7 +14,7 @@ public class AudioTaggingConfig {
|
||||
}
|
||||
|
||||
public static Builder builder() {
|
||||
return new AudioTaggingConfig.Builder();
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
|
||||
@@ -7,7 +7,7 @@ public class OfflineRecognizer {
|
||||
System.loadLibrary("sherpa-onnx-jni");
|
||||
}
|
||||
|
||||
private long ptr = 0; // this is the asr engine ptrss
|
||||
private long ptr = 0;
|
||||
|
||||
public OfflineRecognizer(OfflineRecognizerConfig config) {
|
||||
ptr = newFromFile(config);
|
||||
|
||||
@@ -7,7 +7,7 @@ public class OfflineTts {
|
||||
System.loadLibrary("sherpa-onnx-jni");
|
||||
}
|
||||
|
||||
private long ptr = 0; // this is the asr engine ptrss
|
||||
private long ptr = 0;
|
||||
|
||||
public OfflineTts(OfflineTtsConfig config) {
|
||||
ptr = newFromFile(config);
|
||||
|
||||
@@ -8,7 +8,7 @@ public class OnlineRecognizer {
|
||||
System.loadLibrary("sherpa-onnx-jni");
|
||||
}
|
||||
|
||||
private long ptr = 0; // this is the asr engine ptrss
|
||||
private long ptr = 0;
|
||||
|
||||
|
||||
public OnlineRecognizer(OnlineRecognizerConfig config) {
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class SpeakerEmbeddingExtractor {
|
||||
static {
|
||||
System.loadLibrary("sherpa-onnx-jni");
|
||||
}
|
||||
|
||||
private long ptr = 0;
|
||||
|
||||
public SpeakerEmbeddingExtractor(SpeakerEmbeddingExtractorConfig config) {
|
||||
ptr = newFromFile(config);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void finalize() throws Throwable {
|
||||
release();
|
||||
}
|
||||
|
||||
public void release() {
|
||||
if (this.ptr == 0) {
|
||||
return;
|
||||
}
|
||||
delete(this.ptr);
|
||||
this.ptr = 0;
|
||||
}
|
||||
|
||||
public OnlineStream createStream() {
|
||||
long p = createStream(ptr);
|
||||
return new OnlineStream(p);
|
||||
}
|
||||
|
||||
public boolean isReady(OnlineStream s) {
|
||||
return isReady(ptr, s.getPtr());
|
||||
}
|
||||
|
||||
public float[] compute(OnlineStream s) {
|
||||
return compute(ptr, s.getPtr());
|
||||
}
|
||||
|
||||
public int getDim() {
|
||||
return dim(ptr);
|
||||
}
|
||||
|
||||
private native void delete(long ptr);
|
||||
|
||||
private native long newFromFile(SpeakerEmbeddingExtractorConfig config);
|
||||
|
||||
private native long createStream(long ptr);
|
||||
|
||||
private native boolean isReady(long ptr, long streamPtr);
|
||||
|
||||
private native float[] compute(long ptr, long streamPtr);
|
||||
|
||||
private native int dim(long ptr);
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class SpeakerEmbeddingExtractorConfig {
|
||||
private final String model;
|
||||
private final int numThreads;
|
||||
private final boolean debug;
|
||||
private final String provider;
|
||||
|
||||
private SpeakerEmbeddingExtractorConfig(Builder builder) {
|
||||
this.model = builder.model;
|
||||
this.numThreads = builder.numThreads;
|
||||
this.debug = builder.debug;
|
||||
this.provider = builder.provider;
|
||||
}
|
||||
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
private String model = "";
|
||||
private int numThreads = 1;
|
||||
private boolean debug = true;
|
||||
private String provider = "cpu";
|
||||
|
||||
public SpeakerEmbeddingExtractorConfig build() {
|
||||
return new SpeakerEmbeddingExtractorConfig(this);
|
||||
}
|
||||
|
||||
|
||||
public Builder setModel(String model) {
|
||||
this.model = model;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setNumThreads(int numThreads) {
|
||||
this.numThreads = numThreads;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setDebug(boolean debug) {
|
||||
this.debug = debug;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setProvider(String provider) {
|
||||
this.provider = provider;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class SpeakerEmbeddingManager {
|
||||
static {
|
||||
System.loadLibrary("sherpa-onnx-jni");
|
||||
}
|
||||
|
||||
private long ptr = 0;
|
||||
|
||||
public SpeakerEmbeddingManager(int dim) {
|
||||
ptr = create(dim);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void finalize() throws Throwable {
|
||||
release();
|
||||
}
|
||||
|
||||
public void release() {
|
||||
if (this.ptr == 0) {
|
||||
return;
|
||||
}
|
||||
delete(this.ptr);
|
||||
this.ptr = 0;
|
||||
}
|
||||
|
||||
public boolean add(String name, float[] embedding) {
|
||||
return add(ptr, name, embedding);
|
||||
}
|
||||
|
||||
public boolean add(String name, float[][] embedding) {
|
||||
return addList(ptr, name, embedding);
|
||||
}
|
||||
|
||||
public boolean remove(String name) {
|
||||
return remove(ptr, name);
|
||||
}
|
||||
|
||||
public String search(float[] embedding, float threshold) {
|
||||
return search(ptr, embedding, threshold);
|
||||
}
|
||||
|
||||
public boolean verify(String name, float[] embedding, float threshold) {
|
||||
return verify(ptr, name, embedding, threshold);
|
||||
}
|
||||
|
||||
public boolean contains(String name) {
|
||||
return contains(ptr, name);
|
||||
}
|
||||
|
||||
public int getNumSpeakers() {
|
||||
return numSpeakers(ptr);
|
||||
}
|
||||
|
||||
public String[] getAllSpeakerNames() {
|
||||
return allSpeakerNames(ptr);
|
||||
}
|
||||
|
||||
private native long create(int dim);
|
||||
|
||||
private native void delete(long ptr);
|
||||
|
||||
private native boolean add(long ptr, String name, float[] embedding);
|
||||
|
||||
private native boolean addList(long ptr, String name, float[][] embedding);
|
||||
|
||||
private native boolean remove(long ptr, String name);
|
||||
|
||||
private native String search(long ptr, float[] embedding, float threshold);
|
||||
|
||||
private native boolean verify(long ptr, String name, float[] embedding, float threshold);
|
||||
|
||||
private native boolean contains(long ptr, String name);
|
||||
|
||||
private native int numSpeakers(long ptr);
|
||||
|
||||
private native String[] allSpeakerNames(long ptr);
|
||||
}
|
||||
@@ -12,7 +12,7 @@ public class SpokenLanguageIdentification {
|
||||
}
|
||||
|
||||
private final Map<String, String> localeMap;
|
||||
private long ptr = 0; // this is the asr engine ptrss
|
||||
private long ptr = 0;
|
||||
|
||||
public SpokenLanguageIdentification(SpokenLanguageIdentificationConfig config) {
|
||||
ptr = newFromFile(config);
|
||||
|
||||
Reference in New Issue
Block a user