Add VAD demo for Java API (#928)

This commit is contained in:
Fangjun Kuang
2024-05-28 14:59:47 +08:00
committed by GitHub
parent b1c7d04ce2
commit bcaa6df389
14 changed files with 604 additions and 0 deletions

View File

@@ -0,0 +1,81 @@
// Copyright 2024 Xiaomi Corporation
package com.k2fsa.sherpa.onnx;
public class SileroVadModelConfig {
private final String model;
private final float threshold;
private final float minSilenceDuration;
private final float minSpeechDuration;
private final int windowSize;
private SileroVadModelConfig(Builder builder) {
this.model = builder.model;
this.threshold = builder.threshold;
this.minSilenceDuration = builder.minSilenceDuration;
this.minSpeechDuration = builder.minSpeechDuration;
this.windowSize = builder.windowSize;
}
public static Builder builder() {
return new Builder();
}
public String getModel() {
return model;
}
public float getThreshold() {
return threshold;
}
public float getMinSilenceDuration() {
return minSilenceDuration;
}
public float getMinSpeechDuration() {
return minSpeechDuration;
}
public int getWindowSize() {
return windowSize;
}
public static class Builder {
private String model = "";
private float threshold = 0.5f;
private float minSilenceDuration = 0.25f;
private float minSpeechDuration = 0.5f;
private int windowSize = 512;
public SileroVadModelConfig build() {
return new SileroVadModelConfig(this);
}
public Builder setModel(String model) {
this.model = model;
return this;
}
public Builder setThreshold(float threshold) {
this.threshold = threshold;
return this;
}
public Builder setMinSilenceDuration(float minSilenceDuration) {
this.minSilenceDuration = minSilenceDuration;
return this;
}
public Builder setMinSpeechDuration(float minSpeechDuration) {
this.minSpeechDuration = minSpeechDuration;
return this;
}
public Builder setWindowSize(int windowSize) {
this.windowSize = windowSize;
return this;
}
}
}

View File

@@ -0,0 +1,20 @@
package com.k2fsa.sherpa.onnx;
public class SpeechSegment {
private final int start;
private final float[] samples;
public SpeechSegment(int start, float[] samples) {
this.start = start;
this.samples = samples;
}
public int getStart() {
return start;
}
public float[] getSamples() {
return samples;
}
}

View File

@@ -0,0 +1,78 @@
// Copyright 2024 Xiaomi Corporation
package com.k2fsa.sherpa.onnx;
public class Vad {
static {
System.loadLibrary("sherpa-onnx-jni");
}
private long ptr = 0;
public Vad(VadModelConfig config) {
ptr = newFromFile(config);
}
@Override
protected void finalize() throws Throwable {
release();
}
public void release() {
if (this.ptr == 0) {
return;
}
delete(this.ptr);
this.ptr = 0;
}
public void acceptWaveform(float[] samples) {
acceptWaveform(this.ptr, samples);
}
public boolean empty() {
return empty(this.ptr);
}
public void pop() {
pop(this.ptr);
}
public void clear() {
clear(this.ptr);
}
public void reset() {
reset(this.ptr);
}
public SpeechSegment front() {
Object[] arr = front(this.ptr);
int start = (int) arr[0];
float[] samples = (float[]) arr[1];
return new SpeechSegment(start, samples);
}
public boolean isSpeechDetected() {
return isSpeechDetected(this.ptr);
}
private native void delete(long ptr);
private native long newFromFile(VadModelConfig config);
private native void acceptWaveform(long ptr, float[] samples);
private native boolean empty(long ptr);
private native void pop(long ptr);
private native void clear(long ptr);
private native Object[] front(long ptr);
private native boolean isSpeechDetected(long ptr);
private native void reset(long ptr);
}

View File

@@ -0,0 +1,80 @@
// Copyright 2024 Xiaomi Corporation
package com.k2fsa.sherpa.onnx;
public class VadModelConfig {
private final SileroVadModelConfig sileroVadModelConfig;
private final int sampleRate;
private final int numThreads;
private final boolean debug;
private final String provider;
private VadModelConfig(Builder builder) {
this.sileroVadModelConfig = builder.sileroVadModelConfig;
this.sampleRate = builder.sampleRate;
this.numThreads = builder.numThreads;
this.debug = builder.debug;
this.provider = builder.provider;
}
public static Builder builder() {
return new Builder();
}
public SileroVadModelConfig getSileroVadModelConfig() {
return sileroVadModelConfig;
}
public int getSampleRate() {
return sampleRate;
}
public int getNumThreads() {
return numThreads;
}
public String getProvider() {
return provider;
}
public boolean getDebug() {
return debug;
}
public static class Builder {
private SileroVadModelConfig sileroVadModelConfig = new SileroVadModelConfig.Builder().build();
private int sampleRate = 16000;
private int numThreads = 1;
private boolean debug = true;
private String provider = "cpu";
public VadModelConfig build() {
return new VadModelConfig(this);
}
public Builder setSileroVadModelConfig(SileroVadModelConfig sileroVadModelConfig) {
this.sileroVadModelConfig = sileroVadModelConfig;
return this;
}
public Builder setSampleRate(int sampleRate) {
this.sampleRate = sampleRate;
return this;
}
public Builder setNumThreads(int numThreads) {
this.numThreads = numThreads;
return this;
}
public Builder setDebug(boolean debug) {
this.debug = debug;
return this;
}
public Builder setProvider(String provider) {
this.provider = provider;
return this;
}
}
}

View File

@@ -0,0 +1,15 @@
// Copyright 2024 Xiaomi Corporation
package com.k2fsa.sherpa.onnx;
public class WaveWriter {
public WaveWriter() {
}
public static boolean write(String filename, float[] samples, int sampleRate) {
WaveWriter w = new WaveWriter();
return w.writeWaveToFile(filename, samples, sampleRate);
}
private native boolean writeWaveToFile(String filename, float[] samples, int sampleRate);
}