From 757a44b11632c83db1470cd6620ea7d180e518ba Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sun, 23 Jun 2024 18:34:18 +0800 Subject: [PATCH] Add VAD + microphone example for Java API. (#1045) --- java-api-examples/README.md | 6 ++ java-api-examples/VadFromMic.java | 98 +++++++++++++++++++++++++++ java-api-examples/run-vad-from-mic.sh | 35 ++++++++++ 3 files changed, 139 insertions(+) create mode 100644 java-api-examples/VadFromMic.java create mode 100755 java-api-examples/run-vad-from-mic.sh diff --git a/java-api-examples/README.md b/java-api-examples/README.md index 18f53fae..420fdb71 100755 --- a/java-api-examples/README.md +++ b/java-api-examples/README.md @@ -57,6 +57,12 @@ The punctuation model supports both English and Chinese. ./run-speaker-identification.sh ``` +## VAD with a microphone + +```bash +./run-vad-from-mic.sh +``` + ## VAD (Remove silence) ```bash diff --git a/java-api-examples/VadFromMic.java b/java-api-examples/VadFromMic.java new file mode 100644 index 00000000..95fd3b19 --- /dev/null +++ b/java-api-examples/VadFromMic.java @@ -0,0 +1,98 @@ +// Copyright 2024 Xiaomi Corporation + +// This file shows how to use a silero_vad model to detect speech +// and save detected speech into a wave file. + +import com.k2fsa.sherpa.onnx.*; +import javax.sound.sampled.*; + +public class VadFromMic { + public static void main(String[] args) { + int sampleRate = 16000; + int windowSize = 512; + // please download ./silero_vad.onnx from + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models + String model = "./silero_vad.onnx"; + SileroVadModelConfig sileroVad = + SileroVadModelConfig.builder() + .setModel(model) + .setThreshold(0.5f) + .setMinSilenceDuration(0.25f) + .setMinSpeechDuration(0.5f) + .setWindowSize(windowSize) + .build(); + + VadModelConfig config = + VadModelConfig.builder() + .setSileroVadModelConfig(sileroVad) + .setSampleRate(sampleRate) + .setNumThreads(1) + .setDebug(true) + .setProvider("cpu") + .build(); + + Vad vad = new Vad(config); + + // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/AudioFormat.html + // Linear PCM, 16000Hz, 16-bit, 1 channel, signed, little endian + AudioFormat format = new AudioFormat(sampleRate, 16, 1, true, false); + + // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/DataLine.Info.html#Info-java.lang.Class-javax.sound.sampled.AudioFormat-int- + DataLine.Info info = new DataLine.Info(TargetDataLine.class, format); + TargetDataLine targetDataLine; + try { + targetDataLine = (TargetDataLine) AudioSystem.getLine(info); + targetDataLine.open(format); + targetDataLine.start(); + } catch (LineUnavailableException e) { + System.out.println("Failed to open target data line: " + e.getMessage()); + vad.release(); + return; + } + + boolean printed = false; + int index = 0; + + byte[] buffer = new byte[windowSize * 2]; + float[] samples = new float[windowSize]; + + while (targetDataLine.isOpen()) { + int n = targetDataLine.read(buffer, 0, buffer.length); + if (n <= 0) { + System.out.printf("Got %d bytes. Expected %d bytes.\n", n, buffer.length); + continue; + } + for (int i = 0; i != windowSize; ++i) { + short low = buffer[2 * i]; + short high = buffer[2 * i + 1]; + int s = (high << 8) + low; + samples[i] = (float) s / 32768; + } + + vad.acceptWaveform(samples); + if (vad.isSpeechDetected() && !printed) { + System.out.println("Detected speech"); + printed = true; + } + + if (!vad.isSpeechDetected()) { + printed = false; + } + + while (!vad.empty()) { + float[] segment = vad.front().getSamples(); + float duration = segment.length / (float) sampleRate; + System.out.printf("Duration: %.3f seconds\n", duration); + + String filename = String.format("seg-%d-%.3fs.wav", index, duration); + index += 1; + WaveWriter.write(filename, segment, sampleRate); + System.out.printf("Saved to %s\n", filename); + System.out.println("----------"); + vad.pop(); + } + } + + vad.release(); + } +} diff --git a/java-api-examples/run-vad-from-mic.sh b/java-api-examples/run-vad-from-mic.sh new file mode 100755 index 00000000..320e97cb --- /dev/null +++ b/java-api-examples/run-vad-from-mic.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash + +set -ex + +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then + mkdir -p ../build + pushd ../build + cmake \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=ON \ + .. + + make -j4 + ls -lh lib + popd +fi + +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then + pushd ../sherpa-onnx/java-api + make + popd +fi + +if [ ! -f ./silero_vad.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx +fi + +java \ + -Djava.library.path=$PWD/../build/lib \ + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ + ./VadFromMic.java