diff --git a/.github/workflows/run-java-test.yaml b/.github/workflows/run-java-test.yaml index d865d5b5..fbc1d671 100644 --- a/.github/workflows/run-java-test.yaml +++ b/.github/workflows/run-java-test.yaml @@ -106,6 +106,17 @@ jobs: make -j4 ls -lh lib + - name: Run java test (audio tagging) + shell: bash + run: | + cd ./java-api-examples + ./run-audio-tagging-zipformer-from-file.sh + # Delete model files to save space + rm -rf sherpa-onnx-zipformer-* + + ./run-audio-tagging-ced-from-file.sh + rm -rf sherpa-onnx-ced-* + - name: Run java test (add punctuations) shell: bash run: | diff --git a/.gitignore b/.gitignore index 40b77227..0d0d04e8 100644 --- a/.gitignore +++ b/.gitignore @@ -101,3 +101,4 @@ sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01 *.tar.gz *.tar.bz2 *.zip +sherpa-onnx-ced-* diff --git a/java-api-examples/AudioTaggingCEDFromFile.java b/java-api-examples/AudioTaggingCEDFromFile.java new file mode 100644 index 00000000..777bc047 --- /dev/null +++ b/java-api-examples/AudioTaggingCEDFromFile.java @@ -0,0 +1,60 @@ +// Copyright 2024 Xiaomi Corporation + +// This file shows how to use a CED audio tagging model to tag +// input audio files. +import com.k2fsa.sherpa.onnx.*; + +public class AudioTaggingCEDFromFile { + public static void main(String[] args) { + // please download the model from + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models + String model = "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/model.int8.onnx"; + String labels = "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/class_labels_indices.csv"; + int topK = 5; + + String[] testWaves = + new String[] { + "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/1.wav", + "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/2.wav", + "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/3.wav", + "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/4.wav", + "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/5.wav", + "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/6.wav", + "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/7.wav", + "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/8.wav", + "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/9.wav", + "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/10.wav", + "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/11.wav", + "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/12.wav", + "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/13.wav", + }; + + AudioTaggingModelConfig modelConfig = + AudioTaggingModelConfig.builder().setCED(model).setNumThreads(1).setDebug(true).build(); + + AudioTaggingConfig config = + AudioTaggingConfig.builder().setModel(modelConfig).setLabels(labels).setTopK(topK).build(); + + AudioTagging tagger = new AudioTagging(config); + System.out.println("------"); + for (String filename : testWaves) { + WaveReader reader = new WaveReader(filename); + + OfflineStream stream = tagger.createStream(); + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate()); + + AudioEvent[] events = tagger.compute(stream); + + stream.release(); + + System.out.printf("input file: %s\n", filename); + System.out.printf("Probability\t\tName\n"); + for (AudioEvent e : events) { + System.out.printf("%.3f\t\t\t%s\n", e.getProb(), e.getName()); + } + System.out.println("------"); + } + + tagger.release(); + } +} diff --git a/java-api-examples/AudioTaggingZipformerFromFile.java b/java-api-examples/AudioTaggingZipformerFromFile.java new file mode 100644 index 00000000..f48ca26a --- /dev/null +++ b/java-api-examples/AudioTaggingZipformerFromFile.java @@ -0,0 +1,68 @@ +// Copyright 2024 Xiaomi Corporation + +// This file shows how to use a zipformer audio tagging model to tag +// input audio files. +import com.k2fsa.sherpa.onnx.*; + +public class AudioTaggingZipformerFromFile { + public static void main(String[] args) { + // please download the model from + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models + String model = "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/model.int8.onnx"; + String labels = + "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/class_labels_indices.csv"; + int topK = 5; + + String[] testWaves = + new String[] { + "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/1.wav", + "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/2.wav", + "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/3.wav", + "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/4.wav", + "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/5.wav", + "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/6.wav", + "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/7.wav", + "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/8.wav", + "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/9.wav", + "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/10.wav", + "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/11.wav", + "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/12.wav", + "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/13.wav", + }; + + OfflineZipformerAudioTaggingModelConfig zipformer = + OfflineZipformerAudioTaggingModelConfig.builder().setModel(model).build(); + + AudioTaggingModelConfig modelConfig = + AudioTaggingModelConfig.builder() + .setZipformer(zipformer) + .setNumThreads(1) + .setDebug(true) + .build(); + + AudioTaggingConfig config = + AudioTaggingConfig.builder().setModel(modelConfig).setLabels(labels).setTopK(topK).build(); + + AudioTagging tagger = new AudioTagging(config); + System.out.println("------"); + for (String filename : testWaves) { + WaveReader reader = new WaveReader(filename); + + OfflineStream stream = tagger.createStream(); + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate()); + + AudioEvent[] events = tagger.compute(stream); + + stream.release(); + + System.out.printf("input file: %s\n", filename); + System.out.printf("Probability\t\tName\n"); + for (AudioEvent e : events) { + System.out.printf("%.3f\t\t\t%s\n", e.getProb(), e.getName()); + } + System.out.println("------"); + } + + tagger.release(); + } +} diff --git a/java-api-examples/README.md b/java-api-examples/README.md index 51e8bb1f..89ca5f08 100755 --- a/java-api-examples/README.md +++ b/java-api-examples/README.md @@ -36,10 +36,17 @@ This directory contains examples for the JAVA API of sherpa-onnx. ./run-spoken-language-identification-whisper.sh ``` -## Add puncutations to text +## Add punctuations to text The punctuation model supports both English and Chinese. ```bash ./run-add-punctuation-zh-en.sh ``` + +## Audio tagging + +```bash +./run-audio-tagging-zipformer-from-file.sh +./run-audio-tagging-ced-from-file.sh +``` diff --git a/java-api-examples/run-add-punctuation-zh-en.sh b/java-api-examples/run-add-punctuation-zh-en.sh index 4a440193..5d707ea8 100755 --- a/java-api-examples/run-add-punctuation-zh-en.sh +++ b/java-api-examples/run-add-punctuation-zh-en.sh @@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then popd fi -if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then - cmake \ - -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ - -DSHERPA_ONNX_ENABLE_TESTS=OFF \ - -DSHERPA_ONNX_ENABLE_CHECK=OFF \ - -DBUILD_SHARED_LIBS=ON \ - -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ - -DSHERPA_ONNX_ENABLE_JNI=ON \ - .. - - make -j4 - ls -lh lib -fi - if [ ! -f ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx ]; then curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 diff --git a/java-api-examples/run-audio-tagging-ced-from-file.sh b/java-api-examples/run-audio-tagging-ced-from-file.sh new file mode 100755 index 00000000..35098901 --- /dev/null +++ b/java-api-examples/run-audio-tagging-ced-from-file.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +set -ex + +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then + mkdir -p ../build + pushd ../build + cmake \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=ON \ + .. + + make -j4 + ls -lh lib + popd +fi + +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then + pushd ../sherpa-onnx/java-api + make + popd +fi + +if [ ! -f ./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/model.int8.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2 + tar xvf sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2 + rm sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2 +fi + +java \ + -Djava.library.path=$PWD/../build/lib \ + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ + ./AudioTaggingCEDFromFile.java diff --git a/java-api-examples/run-audio-tagging-zipformer-from-file.sh b/java-api-examples/run-audio-tagging-zipformer-from-file.sh new file mode 100755 index 00000000..36d07dba --- /dev/null +++ b/java-api-examples/run-audio-tagging-zipformer-from-file.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +set -ex + +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then + mkdir -p ../build + pushd ../build + cmake \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=ON \ + .. + + make -j4 + ls -lh lib + popd +fi + +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then + pushd ../sherpa-onnx/java-api + make + popd +fi + +if [ ! -f ./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/model.int8.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2 + tar xvf sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2 + rm sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2 +fi + +java \ + -Djava.library.path=$PWD/../build/lib \ + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ + ./AudioTaggingZipformerFromFile.java diff --git a/java-api-examples/run-non-streaming-decode-file-nemo.sh b/java-api-examples/run-non-streaming-decode-file-nemo.sh index 9ccfa9df..057852a0 100755 --- a/java-api-examples/run-non-streaming-decode-file-nemo.sh +++ b/java-api-examples/run-non-streaming-decode-file-nemo.sh @@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then popd fi -if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then - cmake \ - -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ - -DSHERPA_ONNX_ENABLE_TESTS=OFF \ - -DSHERPA_ONNX_ENABLE_CHECK=OFF \ - -DBUILD_SHARED_LIBS=ON \ - -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ - -DSHERPA_ONNX_ENABLE_JNI=ON \ - .. - - make -j4 - ls -lh lib -fi - if [ ! -f ./sherpa-onnx-nemo-ctc-en-citrinet-512/tokens.txt ]; then curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-en-citrinet-512.tar.bz2 tar xvf sherpa-onnx-nemo-ctc-en-citrinet-512.tar.bz2 diff --git a/java-api-examples/run-non-streaming-decode-file-paraformer.sh b/java-api-examples/run-non-streaming-decode-file-paraformer.sh index 04adbbb5..28592e54 100755 --- a/java-api-examples/run-non-streaming-decode-file-paraformer.sh +++ b/java-api-examples/run-non-streaming-decode-file-paraformer.sh @@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then popd fi -if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then - cmake \ - -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ - -DSHERPA_ONNX_ENABLE_TESTS=OFF \ - -DSHERPA_ONNX_ENABLE_CHECK=OFF \ - -DBUILD_SHARED_LIBS=ON \ - -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ - -DSHERPA_ONNX_ENABLE_JNI=ON \ - .. - - make -j4 - ls -lh lib -fi - if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 diff --git a/java-api-examples/run-non-streaming-decode-file-transducer.sh b/java-api-examples/run-non-streaming-decode-file-transducer.sh index 99354e5a..b244d30b 100755 --- a/java-api-examples/run-non-streaming-decode-file-transducer.sh +++ b/java-api-examples/run-non-streaming-decode-file-transducer.sh @@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then popd fi -if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then - cmake \ - -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ - -DSHERPA_ONNX_ENABLE_TESTS=OFF \ - -DSHERPA_ONNX_ENABLE_CHECK=OFF \ - -DBUILD_SHARED_LIBS=ON \ - -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ - -DSHERPA_ONNX_ENABLE_JNI=ON \ - .. - - make -j4 - ls -lh lib -fi - if [ ! -f ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/tokens.txt ]; then curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2 diff --git a/java-api-examples/run-non-streaming-decode-file-whisper.sh b/java-api-examples/run-non-streaming-decode-file-whisper.sh index c60f8b62..317e90ae 100755 --- a/java-api-examples/run-non-streaming-decode-file-whisper.sh +++ b/java-api-examples/run-non-streaming-decode-file-whisper.sh @@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then popd fi -if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then - cmake \ - -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ - -DSHERPA_ONNX_ENABLE_TESTS=OFF \ - -DSHERPA_ONNX_ENABLE_CHECK=OFF \ - -DBUILD_SHARED_LIBS=ON \ - -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ - -DSHERPA_ONNX_ENABLE_JNI=ON \ - .. - - make -j4 - ls -lh lib -fi - if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt ]; then curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 diff --git a/java-api-examples/run-non-streaming-tts-coqui-de.sh b/java-api-examples/run-non-streaming-tts-coqui-de.sh index 69ffd85c..308d3e93 100755 --- a/java-api-examples/run-non-streaming-tts-coqui-de.sh +++ b/java-api-examples/run-non-streaming-tts-coqui-de.sh @@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then popd fi -if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then - cmake \ - -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ - -DSHERPA_ONNX_ENABLE_TESTS=OFF \ - -DSHERPA_ONNX_ENABLE_CHECK=OFF \ - -DBUILD_SHARED_LIBS=ON \ - -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ - -DSHERPA_ONNX_ENABLE_JNI=ON \ - .. - - make -j4 - ls -lh lib -fi - # please visit # https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models # to download more models diff --git a/java-api-examples/run-non-streaming-tts-piper-en.sh b/java-api-examples/run-non-streaming-tts-piper-en.sh index 7d3ed2f5..61465faa 100755 --- a/java-api-examples/run-non-streaming-tts-piper-en.sh +++ b/java-api-examples/run-non-streaming-tts-piper-en.sh @@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then popd fi -if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then - cmake \ - -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ - -DSHERPA_ONNX_ENABLE_TESTS=OFF \ - -DSHERPA_ONNX_ENABLE_CHECK=OFF \ - -DBUILD_SHARED_LIBS=ON \ - -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ - -DSHERPA_ONNX_ENABLE_JNI=ON \ - .. - - make -j4 - ls -lh lib -fi - # please visit # https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models # to download more models diff --git a/java-api-examples/run-non-streaming-tts-vits-zh.sh b/java-api-examples/run-non-streaming-tts-vits-zh.sh index 2d8f85f5..5be38680 100755 --- a/java-api-examples/run-non-streaming-tts-vits-zh.sh +++ b/java-api-examples/run-non-streaming-tts-vits-zh.sh @@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then popd fi -if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then - cmake \ - -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ - -DSHERPA_ONNX_ENABLE_TESTS=OFF \ - -DSHERPA_ONNX_ENABLE_CHECK=OFF \ - -DBUILD_SHARED_LIBS=ON \ - -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ - -DSHERPA_ONNX_ENABLE_JNI=ON \ - .. - - make -j4 - ls -lh lib -fi - # please visit # https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models # to download more models diff --git a/java-api-examples/run-spoken-language-identification-whisper.sh b/java-api-examples/run-spoken-language-identification-whisper.sh index 48523212..bf2b3cb2 100755 --- a/java-api-examples/run-spoken-language-identification-whisper.sh +++ b/java-api-examples/run-spoken-language-identification-whisper.sh @@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then popd fi -if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then - cmake \ - -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ - -DSHERPA_ONNX_ENABLE_TESTS=OFF \ - -DSHERPA_ONNX_ENABLE_CHECK=OFF \ - -DBUILD_SHARED_LIBS=ON \ - -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ - -DSHERPA_ONNX_ENABLE_JNI=ON \ - .. - - make -j4 - ls -lh lib -fi - # Note that it needs a multilingual whisper model. so, for example, tiny works while tiny.en does not work # https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2 if [ ! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx ]; then diff --git a/sherpa-onnx/java-api/Makefile b/sherpa-onnx/java-api/Makefile index 3920f20c..0d377adb 100644 --- a/sherpa-onnx/java-api/Makefile +++ b/sherpa-onnx/java-api/Makefile @@ -1,3 +1,4 @@ +# Copyright 2024 Xiaomi Corporation # all .class and .jar files are put inside out_dir out_dir := build @@ -44,6 +45,12 @@ java_files += OfflinePunctuationModelConfig.java java_files += OfflinePunctuationConfig.java java_files += OfflinePunctuation.java +java_files += OfflineZipformerAudioTaggingModelConfig.java +java_files += AudioTaggingModelConfig.java +java_files += AudioTaggingConfig.java +java_files += AudioEvent.java +java_files += AudioTagging.java + class_files := $(java_files:%.java=%.class) java_files := $(addprefix src/$(package_dir)/,$(java_files)) diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/AudioEvent.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/AudioEvent.java new file mode 100644 index 00000000..e2543578 --- /dev/null +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/AudioEvent.java @@ -0,0 +1,32 @@ +// Copyright 2024 Xiaomi Corporation + +package com.k2fsa.sherpa.onnx; + +public class AudioEvent { + private String name = ""; + private int index = 0; + private float prob = 0; + + public AudioEvent(String name, int index, float prob) { + this.name = name; + this.index = index; + this.prob = prob; + } + + public String getName() { + return name; + } + + public int getIndex() { + return index; + } + + public float getProb() { + return prob; + } + + @Override + public String toString() { + return String.format("AudioEven(name=%s, index=%d, prob=%.3f)\n", name, index, prob); + } +} diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/AudioTagging.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/AudioTagging.java new file mode 100644 index 00000000..7d05cf7d --- /dev/null +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/AudioTagging.java @@ -0,0 +1,62 @@ +// Copyright 2024 Xiaomi Corporation + +package com.k2fsa.sherpa.onnx; + +public class AudioTagging { + static { + System.loadLibrary("sherpa-onnx-jni"); + } + + private long ptr = 0; + + public AudioTagging(AudioTaggingConfig config) { + ptr = newFromFile(config); + } + + public OfflineStream createStream() { + long p = createStream(ptr); + return new OfflineStream(p); + } + + public AudioEvent[] compute(OfflineStream stream) { + return compute(stream, -1); + + } + + public AudioEvent[] compute(OfflineStream stream, int topK) { + Object[] arr = compute(ptr, stream.getPtr(), topK); + + AudioEvent[] events = new AudioEvent[arr.length]; + for (int i = 0; i < arr.length; ++i) { + Object[] obj = (Object[]) arr[i]; + String name = (String) obj[0]; + int index = (int) obj[1]; + float prob = (float) obj[2]; + events[i] = new AudioEvent(name, index, prob); + } + return events; + } + + + @Override + protected void finalize() throws Throwable { + release(); + } + + // You'd better call it manually if it is not used anymore + public void release() { + if (this.ptr == 0) { + return; + } + delete(this.ptr); + this.ptr = 0; + } + + private native void delete(long ptr); + + private native long newFromFile(AudioTaggingConfig config); + + private native long createStream(long ptr); + + private native Object[] compute(long ptr, long streamPtr, int topK); +} diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/AudioTaggingConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/AudioTaggingConfig.java new file mode 100644 index 00000000..5c6b5009 --- /dev/null +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/AudioTaggingConfig.java @@ -0,0 +1,44 @@ +// Copyright 2024 Xiaomi Corporation + +package com.k2fsa.sherpa.onnx; + +public class AudioTaggingConfig { + private final AudioTaggingModelConfig model; + private final String labels; + private final int topK; + + private AudioTaggingConfig(Builder builder) { + this.model = builder.model; + this.labels = builder.labels; + this.topK = builder.topK; + } + + public static Builder builder() { + return new AudioTaggingConfig.Builder(); + } + + public static class Builder { + private AudioTaggingModelConfig model = AudioTaggingModelConfig.builder().build(); + private String labels = ""; + private int topK = 5; + + public AudioTaggingConfig build() { + return new AudioTaggingConfig(this); + } + + public Builder setModel(AudioTaggingModelConfig model) { + this.model = model; + return this; + } + + public Builder setLabels(String labels) { + this.labels = labels; + return this; + } + + public Builder setTopK(int topK) { + this.topK = topK; + return this; + } + } +} diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/AudioTaggingModelConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/AudioTaggingModelConfig.java new file mode 100644 index 00000000..5521c980 --- /dev/null +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/AudioTaggingModelConfig.java @@ -0,0 +1,60 @@ +// Copyright 2024 Xiaomi Corporation + +package com.k2fsa.sherpa.onnx; + +public class AudioTaggingModelConfig { + private final OfflineZipformerAudioTaggingModelConfig zipformer; + private final String ced; + private final int numThreads; + private final boolean debug; + private final String provider; + + private AudioTaggingModelConfig(Builder builder) { + this.zipformer = builder.zipformer; + this.ced = builder.ced; + this.numThreads = builder.numThreads; + this.debug = builder.debug; + this.provider = builder.provider; + } + + public static Builder builder() { + return new Builder(); + } + + public static class Builder { + private OfflineZipformerAudioTaggingModelConfig zipformer = OfflineZipformerAudioTaggingModelConfig.builder().build(); + private String ced = ""; + private int numThreads = 1; + private boolean debug = true; + private String provider = "cpu"; + + public AudioTaggingModelConfig build() { + return new AudioTaggingModelConfig(this); + } + + public Builder setZipformer(OfflineZipformerAudioTaggingModelConfig zipformer) { + this.zipformer = zipformer; + return this; + } + + public Builder setCED(String ced) { + this.ced = ced; + return this; + } + + public Builder setNumThreads(int numThreads) { + this.numThreads = numThreads; + return this; + } + + public Builder setDebug(boolean debug) { + this.debug = debug; + return this; + } + + public Builder setProvider(String provider) { + this.provider = provider; + return this; + } + } +} diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/EndpointRule.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/EndpointRule.java index 97a5dbb3..3744ae22 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/EndpointRule.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/EndpointRule.java @@ -1,5 +1,6 @@ // Copyright 2022-2023 by zhaoming // Copyright 2024 Xiaomi Corporation + package com.k2fsa.sherpa.onnx; public class EndpointRule { diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineModelConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineModelConfig.java index c51f789a..23164f66 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineModelConfig.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineModelConfig.java @@ -1,4 +1,5 @@ // Copyright 2024 Xiaomi Corporation + package com.k2fsa.sherpa.onnx; public class OfflineModelConfig { diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineNemoEncDecCtcModelConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineNemoEncDecCtcModelConfig.java index d921c033..157ec15f 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineNemoEncDecCtcModelConfig.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineNemoEncDecCtcModelConfig.java @@ -1,4 +1,5 @@ // Copyright 2024 Xiaomi Corporation + package com.k2fsa.sherpa.onnx; public class OfflineNemoEncDecCtcModelConfig { diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineParaformerModelConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineParaformerModelConfig.java index 41f2fd97..7e99533a 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineParaformerModelConfig.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineParaformerModelConfig.java @@ -1,4 +1,5 @@ // Copyright 2024 Xiaomi Corporation + package com.k2fsa.sherpa.onnx; public class OfflineParaformerModelConfig { diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflinePunctuation.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflinePunctuation.java index 4a3f1d7b..95aa50c4 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflinePunctuation.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflinePunctuation.java @@ -7,7 +7,7 @@ public class OfflinePunctuation { System.loadLibrary("sherpa-onnx-jni"); } - private long ptr = 0; // this is the asr engine ptrss + private long ptr = 0; public OfflinePunctuation(OfflinePunctuationConfig config) { ptr = newFromFile(config); diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizer.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizer.java index 002a6428..8a511457 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizer.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizer.java @@ -1,4 +1,5 @@ // Copyright 2024 Xiaomi Corporation + package com.k2fsa.sherpa.onnx; public class OfflineRecognizer { diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizerConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizerConfig.java index 42c9f07a..94d3debc 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizerConfig.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizerConfig.java @@ -1,4 +1,5 @@ // Copyright 2024 Xiaomi Corporation + package com.k2fsa.sherpa.onnx; public class OfflineRecognizerConfig { diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizerResult.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizerResult.java index fd14b831..82630539 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizerResult.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizerResult.java @@ -1,4 +1,5 @@ // Copyright 2024 Xiaomi Corporation + package com.k2fsa.sherpa.onnx; public class OfflineRecognizerResult { diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineStream.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineStream.java index 6159c837..30c4131c 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineStream.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineStream.java @@ -1,4 +1,5 @@ // Copyright 2024 Xiaomi Corporation + package com.k2fsa.sherpa.onnx; public class OfflineStream { diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTransducerModelConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTransducerModelConfig.java index efa32890..e232c226 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTransducerModelConfig.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTransducerModelConfig.java @@ -1,4 +1,5 @@ // Copyright 2024 Xiaomi Corporation + package com.k2fsa.sherpa.onnx; public class OfflineTransducerModelConfig { diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineWhisperModelConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineWhisperModelConfig.java index e5ae7649..7f146374 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineWhisperModelConfig.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineWhisperModelConfig.java @@ -1,4 +1,5 @@ // Copyright 2024 Xiaomi Corporation + package com.k2fsa.sherpa.onnx; public class OfflineWhisperModelConfig { diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineZipformerAudioTaggingModelConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineZipformerAudioTaggingModelConfig.java new file mode 100644 index 00000000..9a8afbb3 --- /dev/null +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineZipformerAudioTaggingModelConfig.java @@ -0,0 +1,32 @@ +// Copyright 2024 Xiaomi Corporation + +package com.k2fsa.sherpa.onnx; + +public class OfflineZipformerAudioTaggingModelConfig { + private final String model; + + private OfflineZipformerAudioTaggingModelConfig(Builder builder) { + this.model = builder.model; + } + + public static Builder builder() { + return new Builder(); + } + + public String getModel() { + return model; + } + + public static class Builder { + private String model = ""; + + public OfflineZipformerAudioTaggingModelConfig build() { + return new OfflineZipformerAudioTaggingModelConfig(this); + } + + public Builder setModel(String model) { + this.model = model; + return this; + } + } +} diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineCtcFstDecoderConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineCtcFstDecoderConfig.java index 7d8bc85b..a9bb3b1d 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineCtcFstDecoderConfig.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineCtcFstDecoderConfig.java @@ -1,4 +1,5 @@ // Copyright 2024 Xiaomi Corporation + package com.k2fsa.sherpa.onnx; public class OnlineCtcFstDecoderConfig { diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineRecognizer.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineRecognizer.java index 1271afce..181b8b3e 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineRecognizer.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineRecognizer.java @@ -1,7 +1,7 @@ // Copyright 2022-2023 by zhaoming // Copyright 2024 Xiaomi Corporation -package com.k2fsa.sherpa.onnx; +package com.k2fsa.sherpa.onnx; public class OnlineRecognizer { static { diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineRecognizerConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineRecognizerConfig.java index e124088e..af4b76d4 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineRecognizerConfig.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineRecognizerConfig.java @@ -1,5 +1,6 @@ // Copyright 2022-2023 by zhaoming // Copyright 2024 Xiaomi Corporation + package com.k2fsa.sherpa.onnx; public class OnlineRecognizerConfig { diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineRecognizerResult.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineRecognizerResult.java index 468e325e..65e15a95 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineRecognizerResult.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineRecognizerResult.java @@ -1,4 +1,5 @@ // Copyright 2024 Xiaomi Corporation + package com.k2fsa.sherpa.onnx; public class OnlineRecognizerResult { diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineStream.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineStream.java index 960144b5..21a00826 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineStream.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineStream.java @@ -1,5 +1,6 @@ // Copyright 2022-2023 by zhaoming // Copyright 2024 Xiaomi Corporation + package com.k2fsa.sherpa.onnx; public class OnlineStream { diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineZipformer2CtcModelConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineZipformer2CtcModelConfig.java index eac70630..27e2d926 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineZipformer2CtcModelConfig.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineZipformer2CtcModelConfig.java @@ -1,4 +1,5 @@ // Copyright 2024 Xiaomi Corporation + package com.k2fsa.sherpa.onnx; public class OnlineZipformer2CtcModelConfig {