diff --git a/java-api-examples/README.md b/java-api-examples/README.md index 420fdb71..be8018ad 100755 --- a/java-api-examples/README.md +++ b/java-api-examples/README.md @@ -63,6 +63,18 @@ The punctuation model supports both English and Chinese. ./run-vad-from-mic.sh ``` +## VAD with a microphone + Non-streaming Paraformer for speech recognition + +```bash +./run-vad-from-mic-non-streaming-paraformer.sh +``` + +## VAD with a microphone + Non-streaming Whisper tiny.en for speech recognition + +```bash +./run-vad-from-mic-non-streaming-whisper.sh +``` + ## VAD (Remove silence) ```bash diff --git a/java-api-examples/VadFromMicWithNonStreamingParaformer.java b/java-api-examples/VadFromMicWithNonStreamingParaformer.java new file mode 100644 index 00000000..6363ea9a --- /dev/null +++ b/java-api-examples/VadFromMicWithNonStreamingParaformer.java @@ -0,0 +1,146 @@ +// Copyright 2024 Xiaomi Corporation + +// This file shows how to use a silero_vad model with a non-streaming Paraformer +// for speech recognition. + +import com.k2fsa.sherpa.onnx.*; +import javax.sound.sampled.*; + +public class VadFromMicWithNonStreamingParaformer { + private static final int sampleRate = 16000; + private static final int windowSize = 512; + + public static Vad createVad() { + // please download ./silero_vad.onnx from + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models + String model = "./silero_vad.onnx"; + SileroVadModelConfig sileroVad = + SileroVadModelConfig.builder() + .setModel(model) + .setThreshold(0.5f) + .setMinSilenceDuration(0.25f) + .setMinSpeechDuration(0.5f) + .setWindowSize(windowSize) + .build(); + + VadModelConfig config = + VadModelConfig.builder() + .setSileroVadModelConfig(sileroVad) + .setSampleRate(sampleRate) + .setNumThreads(1) + .setDebug(true) + .setProvider("cpu") + .build(); + + return new Vad(config); + } + + public static OfflineRecognizer createOfflineRecognizer() { + // please refer to + // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-03-28-chinese-english + // to download model files + String model = "./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx"; + String tokens = "./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt"; + + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + String ruleFsts = "./itn_zh_number.fst"; + + OfflineParaformerModelConfig paraformer = + OfflineParaformerModelConfig.builder().setModel(model).build(); + + OfflineModelConfig modelConfig = + OfflineModelConfig.builder() + .setParaformer(paraformer) + .setTokens(tokens) + .setNumThreads(1) + .setDebug(true) + .build(); + + OfflineRecognizerConfig config = + OfflineRecognizerConfig.builder() + .setOfflineModelConfig(modelConfig) + .setDecodingMethod("greedy_search") + .setRuleFsts(ruleFsts) + .build(); + + return new OfflineRecognizer(config); + } + + public static void main(String[] args) { + Vad vad = createVad(); + OfflineRecognizer recognizer = createOfflineRecognizer(); + + // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/AudioFormat.html + // Linear PCM, 16000Hz, 16-bit, 1 channel, signed, little endian + AudioFormat format = new AudioFormat(sampleRate, 16, 1, true, false); + + // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/DataLine.Info.html#Info-java.lang.Class-javax.sound.sampled.AudioFormat-int- + DataLine.Info info = new DataLine.Info(TargetDataLine.class, format); + TargetDataLine targetDataLine; + try { + targetDataLine = (TargetDataLine) AudioSystem.getLine(info); + targetDataLine.open(format); + targetDataLine.start(); + } catch (LineUnavailableException e) { + System.out.println("Failed to open target data line: " + e.getMessage()); + vad.release(); + recognizer.release(); + return; + } + + boolean printed = false; + byte[] buffer = new byte[windowSize * 2]; + float[] samples = new float[windowSize]; + + System.out.println("Started. Please speak"); + boolean running = true; + while (targetDataLine.isOpen() && running) { + int n = targetDataLine.read(buffer, 0, buffer.length); + if (n <= 0) { + System.out.printf("Got %d bytes. Expected %d bytes.\n", n, buffer.length); + continue; + } + for (int i = 0; i != windowSize; ++i) { + short low = buffer[2 * i]; + short high = buffer[2 * i + 1]; + int s = (high << 8) + low; + samples[i] = (float) s / 32768; + } + + vad.acceptWaveform(samples); + if (vad.isSpeechDetected() && !printed) { + System.out.println("Detected speech"); + printed = true; + } + + if (!vad.isSpeechDetected()) { + printed = false; + } + + while (!vad.empty()) { + SpeechSegment segment = vad.front(); + float startTime = segment.getStart() / (float) sampleRate; + float duration = segment.getSamples().length / (float) sampleRate; + + OfflineStream stream = recognizer.createStream(); + stream.acceptWaveform(segment.getSamples(), sampleRate); + recognizer.decode(stream); + String text = recognizer.getResult(stream).getText(); + stream.release(); + + if (!text.isEmpty()) { + System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text); + } + + if (text.contains("退出程序")) { + running = false; + } + + vad.pop(); + } + } + + vad.release(); + recognizer.release(); + } +} diff --git a/java-api-examples/VadFromMicWithNonStreamingWhisper.java b/java-api-examples/VadFromMicWithNonStreamingWhisper.java new file mode 100644 index 00000000..a80ddeb7 --- /dev/null +++ b/java-api-examples/VadFromMicWithNonStreamingWhisper.java @@ -0,0 +1,143 @@ +// Copyright 2024 Xiaomi Corporation + +// This file shows how to use a silero_vad model with a non-streaming Whisper tiny.en +// for speech recognition. + +import com.k2fsa.sherpa.onnx.*; +import javax.sound.sampled.*; + +public class VadFromMicNonStreamingWhisper { + private static final int sampleRate = 16000; + private static final int windowSize = 512; + + public static Vad createVad() { + // please download ./silero_vad.onnx from + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models + String model = "./silero_vad.onnx"; + SileroVadModelConfig sileroVad = + SileroVadModelConfig.builder() + .setModel(model) + .setThreshold(0.5f) + .setMinSilenceDuration(0.25f) + .setMinSpeechDuration(0.5f) + .setWindowSize(windowSize) + .build(); + + VadModelConfig config = + VadModelConfig.builder() + .setSileroVadModelConfig(sileroVad) + .setSampleRate(sampleRate) + .setNumThreads(1) + .setDebug(true) + .setProvider("cpu") + .build(); + + return new Vad(config); + } + + public static OfflineRecognizer createOfflineRecognizer() { + // please refer to + // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html + // to download model files + String encoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx"; + String decoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx"; + String tokens = "./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt"; + + OfflineWhisperModelConfig whisper = + OfflineWhisperModelConfig.builder().setEncoder(encoder).setDecoder(decoder).build(); + + OfflineModelConfig modelConfig = + OfflineModelConfig.builder() + .setWhisper(whisper) + .setTokens(tokens) + .setNumThreads(1) + .setDebug(true) + .build(); + + OfflineRecognizerConfig config = + OfflineRecognizerConfig.builder() + .setOfflineModelConfig(modelConfig) + .setDecodingMethod("greedy_search") + .build(); + + return new OfflineRecognizer(config); + } + + public static void main(String[] args) { + Vad vad = createVad(); + OfflineRecognizer recognizer = createOfflineRecognizer(); + + // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/AudioFormat.html + // Linear PCM, 16000Hz, 16-bit, 1 channel, signed, little endian + AudioFormat format = new AudioFormat(sampleRate, 16, 1, true, false); + + // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/DataLine.Info.html#Info-java.lang.Class-javax.sound.sampled.AudioFormat-int- + DataLine.Info info = new DataLine.Info(TargetDataLine.class, format); + TargetDataLine targetDataLine; + try { + targetDataLine = (TargetDataLine) AudioSystem.getLine(info); + targetDataLine.open(format); + targetDataLine.start(); + } catch (LineUnavailableException e) { + System.out.println("Failed to open target data line: " + e.getMessage()); + vad.release(); + recognizer.release(); + return; + } + + boolean printed = false; + byte[] buffer = new byte[windowSize * 2]; + float[] samples = new float[windowSize]; + + System.out.println("Started. Please speak"); + boolean running = true; + while (targetDataLine.isOpen() && running) { + int n = targetDataLine.read(buffer, 0, buffer.length); + if (n <= 0) { + System.out.printf("Got %d bytes. Expected %d bytes.\n", n, buffer.length); + continue; + } + for (int i = 0; i != windowSize; ++i) { + short low = buffer[2 * i]; + short high = buffer[2 * i + 1]; + int s = (high << 8) + low; + samples[i] = (float) s / 32768; + } + + vad.acceptWaveform(samples); + if (vad.isSpeechDetected() && !printed) { + System.out.println("Detected speech"); + printed = true; + } + + if (!vad.isSpeechDetected()) { + printed = false; + } + + while (!vad.empty()) { + SpeechSegment segment = vad.front(); + float startTime = segment.getStart() / (float) sampleRate; + float duration = segment.getSamples().length / (float) sampleRate; + + OfflineStream stream = recognizer.createStream(); + stream.acceptWaveform(segment.getSamples(), sampleRate); + recognizer.decode(stream); + String text = recognizer.getResult(stream).getText(); + stream.release(); + + if (!text.isEmpty()) { + System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text); + } + + if (text.contains("exit the program")) { + running = false; + } + + vad.pop(); + } + } + + vad.release(); + recognizer.release(); + } +} diff --git a/java-api-examples/run-vad-from-mic-non-streaming-paraformer.sh b/java-api-examples/run-vad-from-mic-non-streaming-paraformer.sh new file mode 100755 index 00000000..edb97379 --- /dev/null +++ b/java-api-examples/run-vad-from-mic-non-streaming-paraformer.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +set -ex + +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then + mkdir -p ../build + pushd ../build + cmake \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=ON \ + .. + + make -j4 + ls -lh lib + popd +fi + +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then + pushd ../sherpa-onnx/java-api + make + popd +fi + +if [ ! -f ./silero_vad.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx +fi + +if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 + + tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 + rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 +fi + +if [ ! -f ./itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst +fi + +java \ + -Djava.library.path=$PWD/../build/lib \ + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ + ./VadFromMicWithNonStreamingParaformer.java diff --git a/java-api-examples/run-vad-from-mic-non-streaming-whisper.sh b/java-api-examples/run-vad-from-mic-non-streaming-whisper.sh new file mode 100755 index 00000000..63c661d1 --- /dev/null +++ b/java-api-examples/run-vad-from-mic-non-streaming-whisper.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash + +set -ex + +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then + mkdir -p ../build + pushd ../build + cmake \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=ON \ + .. + + make -j4 + ls -lh lib + popd +fi + +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then + pushd ../sherpa-onnx/java-api + make + popd +fi + +if [ ! -f ./silero_vad.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx +fi + +if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 + + tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 + rm sherpa-onnx-whisper-tiny.en.tar.bz2 +fi + +java \ + -Djava.library.path=$PWD/../build/lib \ + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ + ./VadFromMicWithNonStreamingWhisper.java diff --git a/java-api-examples/src/DecodeFile.java b/java-api-examples/src/DecodeFile.java deleted file mode 100644 index c12cf3a8..00000000 --- a/java-api-examples/src/DecodeFile.java +++ /dev/null @@ -1,183 +0,0 @@ -/* - * // Copyright 2022-2023 by zhaoming - */ -/* -Config modelconfig.cfg - sample_rate=16000 - feature_dim=80 - rule1_min_trailing_silence=2.4 - rule2_min_trailing_silence=1.2 - rule3_min_utterance_length=20 - encoder=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx - decoder=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx - joiner=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx - tokens=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt - num_threads=4 - enable_endpoint_detection=false - decoding_method=greedy_search - max_active_paths=4 -*/ - -import com.k2fsa.sherpa.onnx.OnlineRecognizer; -import com.k2fsa.sherpa.onnx.OnlineStream; -import java.io.*; -import java.nio.charset.StandardCharsets; - -public class DecodeFile { - OnlineRecognizer rcgOjb; - OnlineStream streamObj; - String wavfilename; - - public DecodeFile(String fileName) { - wavfilename = fileName; - } - - public void initModelWithPara() { - try { - String modelDir = - "/sherpa-onnx/build_old/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20"; - String encoder = modelDir + "/encoder-epoch-99-avg-1.onnx"; - String decoder = modelDir + "/decoder-epoch-99-avg-1.onnx"; - String joiner = modelDir + "/joiner-epoch-99-avg-1.onnx"; - String tokens = modelDir + "/tokens.txt"; - int numThreads = 4; - int sampleRate = 16000; - int featureDim = 80; - boolean enableEndpointDetection = false; - float rule1MinTrailingSilence = 2.4F; - float rule2MinTrailingSilence = 1.2F; - float rule3MinUtteranceLength = 20F; - String decodingMethod = "greedy_search"; - int maxActivePaths = 4; - String hotwordsFile = ""; - float hotwordsScore = 1.5F; - String lm_model = ""; - float lm_scale = 0.5F; - String modelType = "zipformer"; - rcgOjb = - new OnlineRecognizer( - tokens, - encoder, - decoder, - joiner, - numThreads, - sampleRate, - featureDim, - enableEndpointDetection, - rule1MinTrailingSilence, - rule2MinTrailingSilence, - rule3MinUtteranceLength, - decodingMethod, - lm_model, - lm_scale, - maxActivePaths, - hotwordsFile, - hotwordsScore, - modelType); - streamObj = rcgOjb.createStream(); - } catch (Exception e) { - System.err.println(e); - e.printStackTrace(); - } - } - - public void initModelWithCfg(String cfgFile) { - try { - // you should set setCfgPath() before running this - rcgOjb = new OnlineRecognizer(cfgFile); - streamObj = rcgOjb.createStream(); - } catch (Exception e) { - System.err.println(e); - e.printStackTrace(); - } - } - - public void simpleExample() { - try { - float[] buffer = rcgOjb.readWavFile(wavfilename); // read data from file - streamObj.acceptWaveform(buffer); // feed stream with data - streamObj.inputFinished(); // tell engine you done with all data - OnlineStream ssObj[] = new OnlineStream[1]; - while (rcgOjb.isReady(streamObj)) { // engine is ready for unprocessed data - ssObj[0] = streamObj; - rcgOjb.decodeStreams(ssObj); // decode for multiple stream - // rcgOjb.DecodeStream(streamObj); // decode for single stream - } - - String recText = "simple:" + rcgOjb.getResult(streamObj) + "\n"; - byte[] utf8Data = recText.getBytes(StandardCharsets.UTF_8); - System.out.println(new String(utf8Data)); - rcgOjb.reSet(streamObj); - rcgOjb.releaseStream(streamObj); // release stream - rcgOjb.release(); // release recognizer - - } catch (Exception e) { - System.err.println(e); - e.printStackTrace(); - } - } - - public void streamExample() { - try { - float[] buffer = rcgOjb.readWavFile(wavfilename); // read data from file - float[] chunk = new float[1600]; // //each time read 1600(0.1s) data - int chunkIndex = 0; - for (int i = 0; i < buffer.length; i++) // total wav length loop - { - chunk[chunkIndex] = buffer[i]; - chunkIndex++; - if (chunkIndex >= 1600 || i == (buffer.length - 1)) { - chunkIndex = 0; - streamObj.acceptWaveform(chunk); // feed chunk - if (rcgOjb.isReady(streamObj)) { - rcgOjb.decodeStream(streamObj); - } - String testDate = rcgOjb.getResult(streamObj); - byte[] utf8Data = testDate.getBytes(StandardCharsets.UTF_8); - - if (utf8Data.length > 0) { - System.out.println(Float.valueOf((float) i / 16000) + ":" + new String(utf8Data)); - } - } - } - streamObj.inputFinished(); - while (rcgOjb.isReady(streamObj)) { - rcgOjb.decodeStream(streamObj); - } - - String recText = "stream:" + rcgOjb.getResult(streamObj) + "\n"; - byte[] utf8Data = recText.getBytes(StandardCharsets.UTF_8); - System.out.println(new String(utf8Data)); - rcgOjb.reSet(streamObj); - rcgOjb.releaseStream(streamObj); // release stream - rcgOjb.release(); // release recognizer - - } catch (Exception e) { - System.err.println(e); - e.printStackTrace(); - } - } - - public static void main(String[] args) { - try { - String appDir = System.getProperty("user.dir"); - System.out.println("appdir=" + appDir); - String fileName = appDir + "/" + args[0]; - String cfgPath = appDir + "/modeltest.cfg"; - String soPath = appDir + "/../build/lib/libsherpa-onnx-jni.so"; - OnlineRecognizer.setSoPath(soPath); - DecodeFile rcgDemo = new DecodeFile(fileName); - - // ***************** */ - rcgDemo.initModelWithCfg(cfgPath); - rcgDemo.streamExample(); - // **************** */ - rcgDemo.initModelWithCfg(cfgPath); - rcgDemo.simpleExample(); - - } catch (Exception e) { - System.err.println(e); - e.printStackTrace(); - } - } -} diff --git a/java-api-examples/src/DecodeMic.java b/java-api-examples/src/DecodeMic.java deleted file mode 100755 index 76d00679..00000000 --- a/java-api-examples/src/DecodeMic.java +++ /dev/null @@ -1,223 +0,0 @@ -/* - * // Copyright 2022-2023 by zhaoming - */ -/* -Real-time speech recognition from a microphone with com.k2fsa.sherpa.onnx Java API - -example for cfgFile modelconfig.cfg - sample_rate=16000 - feature_dim=80 - rule1_min_trailing_silence=2.4 - rule2_min_trailing_silence=1.2 - rule3_min_utterance_length=20 - encoder=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx - decoder=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx - joiner=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx - tokens=/sherpa-onnx/build/bin/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt - num_threads=4 - enable_endpoint_detection=true - decoding_method=greedy_search - max_active_paths=4 - -*/ -import com.k2fsa.sherpa.onnx.OnlineRecognizer; -import com.k2fsa.sherpa.onnx.OnlineStream; -import java.io.*; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.ShortBuffer; -import java.nio.charset.StandardCharsets; -import javax.sound.sampled.AudioFormat; -import javax.sound.sampled.AudioSystem; -import javax.sound.sampled.DataLine; -import javax.sound.sampled.TargetDataLine; - -/** Microphone Example */ -public class DecodeMic { - MicRcgThread micRcgThread = null; // thread handle - - OnlineRecognizer rcgOjb; // the recognizer - - OnlineStream streamObj; // the stream - - public DecodeMic() { - - micRcgThread = new MicRcgThread(); // create a new instance for MicRcgThread - } - - public void open() { - micRcgThread.start(); // start to capture microphone data - } - - public void close() { - micRcgThread.stop(); // close capture - } - - /** init asr engine with config file */ - public void initModelWithCfg(String cfgFile) { - try { - - // set setSoPath() before running this - rcgOjb = new OnlineRecognizer(cfgFile); - - streamObj = rcgOjb.createStream(); // create a stream for asr engine to feed data - } catch (Exception e) { - System.err.println(e); - e.printStackTrace(); - } - } - - /** read data from mic and feed to asr engine */ - class MicRcgThread implements Runnable { - - TargetDataLine capline; // line for capture mic data - - Thread thread; // this thread - int segmentId = 0; // record the segment id when detect endpoint - String preText = ""; // decoded text - - public MicRcgThread() {} - - public void start() { - - thread = new Thread(this); - - thread.start(); // start thread - } - - public void stop() { - capline.stop(); - capline.close(); - capline = null; - thread = null; - } - - /** feed captured microphone data to asr */ - public void decodeSample(byte[] samplebytes) { - try { - ByteBuffer byteBuf = ByteBuffer.wrap(samplebytes); // create a bytebuf for samples - byteBuf.order(ByteOrder.LITTLE_ENDIAN); // set bytebuf to little endian - ShortBuffer shortBuf = byteBuf.asShortBuffer(); // covert to short type - short[] arrShort = new short[shortBuf.capacity()]; // array for copy short data - float[] arrFloat = new float[shortBuf.capacity()]; // array for copy float data - shortBuf.get(arrShort); // put date to arrShort - - for (int i = 0; i < arrShort.length; i++) { - arrFloat[i] = arrShort[i] / 32768f; // loop to covert short data to float -1 to 1 - } - streamObj.acceptWaveform(arrFloat); // feed asr engine with float data - while (rcgOjb.isReady(streamObj)) { // if engine is ready for unprocessed data - - rcgOjb.decodeStream(streamObj); // decode for this stream - } - boolean isEndpoint = - rcgOjb.isEndpoint( - streamObj); // endpoint check, make sure enable_endpoint_detection=true in config - // file - String nowText = rcgOjb.getResult(streamObj); // get asr result - String recText = ""; - byte[] utf8Data; // for covert text to utf8 - if (isEndpoint && nowText.length() > 0) { - rcgOjb.reSet(streamObj); // reSet stream when detect endpoint - segmentId++; - preText = nowText; - recText = "text(seg_" + String.valueOf(segmentId) + "):" + nowText + "\n"; - utf8Data = recText.getBytes(StandardCharsets.UTF_8); - System.out.println(new String(utf8Data)); - } - - if (!nowText.equals(preText)) { // if preText not equal nowtext - preText = nowText; - recText = nowText + "\n"; - utf8Data = recText.getBytes(StandardCharsets.UTF_8); - System.out.println(new String(utf8Data)); - } - } catch (Exception e) { - System.err.println(e); - e.printStackTrace(); - } - } - - /** run mic capture thread */ - public void run() { - System.out.println("Started! Please speak..."); - - AudioFormat.Encoding encoding = AudioFormat.Encoding.PCM_SIGNED; // the pcm format - float rate = 16000.0f; // using 16 kHz - int channels = 1; // single channel - int sampleSize = 16; // sampleSize 16bit - boolean isBigEndian = false; // using little endian - - AudioFormat format = - new AudioFormat( - encoding, rate, sampleSize, channels, (sampleSize / 8) * channels, rate, isBigEndian); - - DataLine.Info info = new DataLine.Info(TargetDataLine.class, format); - - // check system support such data format - if (!AudioSystem.isLineSupported(info)) { - System.out.println(info + " not supported."); - return; - } - - // open a line for capture. - - try { - capline = (TargetDataLine) AudioSystem.getLine(info); - capline.open(format, capline.getBufferSize()); - } catch (Exception ex) { - System.out.println(ex); - return; - } - - // the buf size for mic captured each time - int bufferLengthInBytes = capline.getBufferSize() / 8 * format.getFrameSize(); - byte[] micData = new byte[bufferLengthInBytes]; - int numBytesRead; - - capline.start(); // start to capture mic data - - while (thread != null) { - // read data from line - if ((numBytesRead = capline.read(micData, 0, bufferLengthInBytes)) == -1) { - break; - } - - decodeSample(micData); // decode mic data - } - - // stop and close - - try { - if (capline != null) { - capline.stop(); - capline.close(); - capline = null; - } - - } catch (Exception ex) { - System.err.println(ex); - } - } - } // End class DecodeMic - - public static void main(String s[]) { - try { - String appDir = System.getProperty("user.dir"); - System.out.println("appdir=" + appDir); - String cfgPath = appDir + "/modelconfig.cfg"; - String soPath = appDir + "/../build/lib/libsherpa-onnx-jni.so"; - OnlineRecognizer.setSoPath(soPath); // set so. lib for OnlineRecognizer - - DecodeMic decodeEx = new DecodeMic(); - decodeEx.initModelWithCfg(cfgPath); // init asr engine - decodeEx.open(); // open thread for mic - System.out.print("Press Enter to EXIT!\n"); - char i = (char) System.in.read(); - decodeEx.close(); - } catch (Exception e) { - System.err.println(e); - e.printStackTrace(); - } - } -}