Add VAD demo for Java API (#928)
This commit is contained in:
@@ -0,0 +1,81 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class SileroVadModelConfig {
|
||||
private final String model;
|
||||
private final float threshold;
|
||||
private final float minSilenceDuration;
|
||||
private final float minSpeechDuration;
|
||||
private final int windowSize;
|
||||
|
||||
private SileroVadModelConfig(Builder builder) {
|
||||
this.model = builder.model;
|
||||
this.threshold = builder.threshold;
|
||||
this.minSilenceDuration = builder.minSilenceDuration;
|
||||
this.minSpeechDuration = builder.minSpeechDuration;
|
||||
this.windowSize = builder.windowSize;
|
||||
}
|
||||
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
public String getModel() {
|
||||
return model;
|
||||
}
|
||||
|
||||
public float getThreshold() {
|
||||
return threshold;
|
||||
}
|
||||
|
||||
public float getMinSilenceDuration() {
|
||||
return minSilenceDuration;
|
||||
}
|
||||
|
||||
public float getMinSpeechDuration() {
|
||||
return minSpeechDuration;
|
||||
}
|
||||
|
||||
public int getWindowSize() {
|
||||
return windowSize;
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
private String model = "";
|
||||
private float threshold = 0.5f;
|
||||
private float minSilenceDuration = 0.25f;
|
||||
private float minSpeechDuration = 0.5f;
|
||||
private int windowSize = 512;
|
||||
|
||||
public SileroVadModelConfig build() {
|
||||
return new SileroVadModelConfig(this);
|
||||
}
|
||||
|
||||
|
||||
public Builder setModel(String model) {
|
||||
this.model = model;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setThreshold(float threshold) {
|
||||
this.threshold = threshold;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setMinSilenceDuration(float minSilenceDuration) {
|
||||
this.minSilenceDuration = minSilenceDuration;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setMinSpeechDuration(float minSpeechDuration) {
|
||||
this.minSpeechDuration = minSpeechDuration;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setWindowSize(int windowSize) {
|
||||
this.windowSize = windowSize;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class SpeechSegment {
|
||||
|
||||
private final int start;
|
||||
private final float[] samples;
|
||||
|
||||
public SpeechSegment(int start, float[] samples) {
|
||||
this.start = start;
|
||||
this.samples = samples;
|
||||
}
|
||||
|
||||
public int getStart() {
|
||||
return start;
|
||||
}
|
||||
|
||||
public float[] getSamples() {
|
||||
return samples;
|
||||
}
|
||||
}
|
||||
78
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/Vad.java
Normal file
78
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/Vad.java
Normal file
@@ -0,0 +1,78 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class Vad {
|
||||
static {
|
||||
System.loadLibrary("sherpa-onnx-jni");
|
||||
}
|
||||
|
||||
private long ptr = 0;
|
||||
|
||||
public Vad(VadModelConfig config) {
|
||||
ptr = newFromFile(config);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void finalize() throws Throwable {
|
||||
release();
|
||||
}
|
||||
|
||||
public void release() {
|
||||
if (this.ptr == 0) {
|
||||
return;
|
||||
}
|
||||
delete(this.ptr);
|
||||
this.ptr = 0;
|
||||
}
|
||||
|
||||
public void acceptWaveform(float[] samples) {
|
||||
acceptWaveform(this.ptr, samples);
|
||||
}
|
||||
|
||||
public boolean empty() {
|
||||
return empty(this.ptr);
|
||||
}
|
||||
|
||||
public void pop() {
|
||||
pop(this.ptr);
|
||||
}
|
||||
|
||||
public void clear() {
|
||||
clear(this.ptr);
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
reset(this.ptr);
|
||||
}
|
||||
|
||||
public SpeechSegment front() {
|
||||
Object[] arr = front(this.ptr);
|
||||
int start = (int) arr[0];
|
||||
float[] samples = (float[]) arr[1];
|
||||
|
||||
return new SpeechSegment(start, samples);
|
||||
}
|
||||
|
||||
public boolean isSpeechDetected() {
|
||||
return isSpeechDetected(this.ptr);
|
||||
}
|
||||
|
||||
private native void delete(long ptr);
|
||||
|
||||
private native long newFromFile(VadModelConfig config);
|
||||
|
||||
private native void acceptWaveform(long ptr, float[] samples);
|
||||
|
||||
private native boolean empty(long ptr);
|
||||
|
||||
private native void pop(long ptr);
|
||||
|
||||
private native void clear(long ptr);
|
||||
|
||||
private native Object[] front(long ptr);
|
||||
|
||||
private native boolean isSpeechDetected(long ptr);
|
||||
|
||||
private native void reset(long ptr);
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class VadModelConfig {
|
||||
private final SileroVadModelConfig sileroVadModelConfig;
|
||||
private final int sampleRate;
|
||||
private final int numThreads;
|
||||
private final boolean debug;
|
||||
private final String provider;
|
||||
|
||||
private VadModelConfig(Builder builder) {
|
||||
this.sileroVadModelConfig = builder.sileroVadModelConfig;
|
||||
this.sampleRate = builder.sampleRate;
|
||||
this.numThreads = builder.numThreads;
|
||||
this.debug = builder.debug;
|
||||
this.provider = builder.provider;
|
||||
}
|
||||
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
public SileroVadModelConfig getSileroVadModelConfig() {
|
||||
return sileroVadModelConfig;
|
||||
}
|
||||
|
||||
public int getSampleRate() {
|
||||
return sampleRate;
|
||||
}
|
||||
|
||||
public int getNumThreads() {
|
||||
return numThreads;
|
||||
}
|
||||
|
||||
public String getProvider() {
|
||||
return provider;
|
||||
}
|
||||
|
||||
public boolean getDebug() {
|
||||
return debug;
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
private SileroVadModelConfig sileroVadModelConfig = new SileroVadModelConfig.Builder().build();
|
||||
private int sampleRate = 16000;
|
||||
private int numThreads = 1;
|
||||
private boolean debug = true;
|
||||
private String provider = "cpu";
|
||||
|
||||
public VadModelConfig build() {
|
||||
return new VadModelConfig(this);
|
||||
}
|
||||
|
||||
public Builder setSileroVadModelConfig(SileroVadModelConfig sileroVadModelConfig) {
|
||||
this.sileroVadModelConfig = sileroVadModelConfig;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setSampleRate(int sampleRate) {
|
||||
this.sampleRate = sampleRate;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setNumThreads(int numThreads) {
|
||||
this.numThreads = numThreads;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setDebug(boolean debug) {
|
||||
this.debug = debug;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setProvider(String provider) {
|
||||
this.provider = provider;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class WaveWriter {
|
||||
public WaveWriter() {
|
||||
}
|
||||
|
||||
public static boolean write(String filename, float[] samples, int sampleRate) {
|
||||
WaveWriter w = new WaveWriter();
|
||||
return w.writeWaveToFile(filename, samples, sampleRate);
|
||||
}
|
||||
|
||||
private native boolean writeWaveToFile(String filename, float[] samples, int sampleRate);
|
||||
}
|
||||
Reference in New Issue
Block a user