Add Java API for audio tagging (#820)

This commit is contained in:
Fangjun Kuang
2024-04-28 22:26:04 +08:00
committed by GitHub
parent 5407f880c0
commit 88202f05bb
39 changed files with 476 additions and 129 deletions

View File

@@ -0,0 +1,60 @@
// Copyright 2024 Xiaomi Corporation
// This file shows how to use a CED audio tagging model to tag
// input audio files.
import com.k2fsa.sherpa.onnx.*;
public class AudioTaggingCEDFromFile {
public static void main(String[] args) {
// please download the model from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
String model = "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/model.int8.onnx";
String labels = "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/class_labels_indices.csv";
int topK = 5;
String[] testWaves =
new String[] {
"./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/1.wav",
"./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/2.wav",
"./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/3.wav",
"./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/4.wav",
"./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/5.wav",
"./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/6.wav",
"./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/7.wav",
"./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/8.wav",
"./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/9.wav",
"./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/10.wav",
"./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/11.wav",
"./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/12.wav",
"./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/13.wav",
};
AudioTaggingModelConfig modelConfig =
AudioTaggingModelConfig.builder().setCED(model).setNumThreads(1).setDebug(true).build();
AudioTaggingConfig config =
AudioTaggingConfig.builder().setModel(modelConfig).setLabels(labels).setTopK(topK).build();
AudioTagging tagger = new AudioTagging(config);
System.out.println("------");
for (String filename : testWaves) {
WaveReader reader = new WaveReader(filename);
OfflineStream stream = tagger.createStream();
stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());
AudioEvent[] events = tagger.compute(stream);
stream.release();
System.out.printf("input file: %s\n", filename);
System.out.printf("Probability\t\tName\n");
for (AudioEvent e : events) {
System.out.printf("%.3f\t\t\t%s\n", e.getProb(), e.getName());
}
System.out.println("------");
}
tagger.release();
}
}

View File

@@ -0,0 +1,68 @@
// Copyright 2024 Xiaomi Corporation
// This file shows how to use a zipformer audio tagging model to tag
// input audio files.
import com.k2fsa.sherpa.onnx.*;
public class AudioTaggingZipformerFromFile {
public static void main(String[] args) {
// please download the model from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
String model = "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/model.int8.onnx";
String labels =
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/class_labels_indices.csv";
int topK = 5;
String[] testWaves =
new String[] {
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/1.wav",
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/2.wav",
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/3.wav",
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/4.wav",
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/5.wav",
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/6.wav",
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/7.wav",
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/8.wav",
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/9.wav",
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/10.wav",
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/11.wav",
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/12.wav",
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/13.wav",
};
OfflineZipformerAudioTaggingModelConfig zipformer =
OfflineZipformerAudioTaggingModelConfig.builder().setModel(model).build();
AudioTaggingModelConfig modelConfig =
AudioTaggingModelConfig.builder()
.setZipformer(zipformer)
.setNumThreads(1)
.setDebug(true)
.build();
AudioTaggingConfig config =
AudioTaggingConfig.builder().setModel(modelConfig).setLabels(labels).setTopK(topK).build();
AudioTagging tagger = new AudioTagging(config);
System.out.println("------");
for (String filename : testWaves) {
WaveReader reader = new WaveReader(filename);
OfflineStream stream = tagger.createStream();
stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());
AudioEvent[] events = tagger.compute(stream);
stream.release();
System.out.printf("input file: %s\n", filename);
System.out.printf("Probability\t\tName\n");
for (AudioEvent e : events) {
System.out.printf("%.3f\t\t\t%s\n", e.getProb(), e.getName());
}
System.out.println("------");
}
tagger.release();
}
}

View File

@@ -36,10 +36,17 @@ This directory contains examples for the JAVA API of sherpa-onnx.
./run-spoken-language-identification-whisper.sh
```
## Add puncutations to text
## Add punctuations to text
The punctuation model supports both English and Chinese.
```bash
./run-add-punctuation-zh-en.sh
```
## Audio tagging
```bash
./run-audio-tagging-zipformer-from-file.sh
./run-audio-tagging-ced-from-file.sh
```

View File

@@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
popd
fi
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
cmake \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=ON \
..
make -j4
ls -lh lib
fi
if [ ! -f ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2

View File

@@ -0,0 +1,37 @@
#!/usr/bin/env bash
set -ex
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
mkdir -p ../build
pushd ../build
cmake \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=ON \
..
make -j4
ls -lh lib
popd
fi
if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
pushd ../sherpa-onnx/java-api
make
popd
fi
if [ ! -f ./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/model.int8.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2
tar xvf sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2
rm sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2
fi
java \
-Djava.library.path=$PWD/../build/lib \
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
./AudioTaggingCEDFromFile.java

View File

@@ -0,0 +1,37 @@
#!/usr/bin/env bash
set -ex
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
mkdir -p ../build
pushd ../build
cmake \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=ON \
..
make -j4
ls -lh lib
popd
fi
if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
pushd ../sherpa-onnx/java-api
make
popd
fi
if [ ! -f ./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/model.int8.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
tar xvf sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
rm sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
fi
java \
-Djava.library.path=$PWD/../build/lib \
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
./AudioTaggingZipformerFromFile.java

View File

@@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
popd
fi
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
cmake \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=ON \
..
make -j4
ls -lh lib
fi
if [ ! -f ./sherpa-onnx-nemo-ctc-en-citrinet-512/tokens.txt ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-en-citrinet-512.tar.bz2
tar xvf sherpa-onnx-nemo-ctc-en-citrinet-512.tar.bz2

View File

@@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
popd
fi
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
cmake \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=ON \
..
make -j4
ls -lh lib
fi
if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2

View File

@@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
popd
fi
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
cmake \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=ON \
..
make -j4
ls -lh lib
fi
if [ ! -f ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/tokens.txt ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2

View File

@@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
popd
fi
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
cmake \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=ON \
..
make -j4
ls -lh lib
fi
if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2

View File

@@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
popd
fi
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
cmake \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=ON \
..
make -j4
ls -lh lib
fi
# please visit
# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
# to download more models

View File

@@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
popd
fi
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
cmake \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=ON \
..
make -j4
ls -lh lib
fi
# please visit
# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
# to download more models

View File

@@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
popd
fi
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
cmake \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=ON \
..
make -j4
ls -lh lib
fi
# please visit
# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
# to download more models

View File

@@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
popd
fi
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
cmake \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_JNI=ON \
..
make -j4
ls -lh lib
fi
# Note that it needs a multilingual whisper model. so, for example, tiny works while tiny.en does not work
# https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
if [ ! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx ]; then