Add Java API for audio tagging (#820)
This commit is contained in:
11
.github/workflows/run-java-test.yaml
vendored
11
.github/workflows/run-java-test.yaml
vendored
@@ -106,6 +106,17 @@ jobs:
|
||||
make -j4
|
||||
ls -lh lib
|
||||
|
||||
- name: Run java test (audio tagging)
|
||||
shell: bash
|
||||
run: |
|
||||
cd ./java-api-examples
|
||||
./run-audio-tagging-zipformer-from-file.sh
|
||||
# Delete model files to save space
|
||||
rm -rf sherpa-onnx-zipformer-*
|
||||
|
||||
./run-audio-tagging-ced-from-file.sh
|
||||
rm -rf sherpa-onnx-ced-*
|
||||
|
||||
- name: Run java test (add punctuations)
|
||||
shell: bash
|
||||
run: |
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -101,3 +101,4 @@ sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01
|
||||
*.tar.gz
|
||||
*.tar.bz2
|
||||
*.zip
|
||||
sherpa-onnx-ced-*
|
||||
|
||||
60
java-api-examples/AudioTaggingCEDFromFile.java
Normal file
60
java-api-examples/AudioTaggingCEDFromFile.java
Normal file
@@ -0,0 +1,60 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
// This file shows how to use a CED audio tagging model to tag
|
||||
// input audio files.
|
||||
import com.k2fsa.sherpa.onnx.*;
|
||||
|
||||
public class AudioTaggingCEDFromFile {
|
||||
public static void main(String[] args) {
|
||||
// please download the model from
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
|
||||
String model = "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/model.int8.onnx";
|
||||
String labels = "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/class_labels_indices.csv";
|
||||
int topK = 5;
|
||||
|
||||
String[] testWaves =
|
||||
new String[] {
|
||||
"./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/1.wav",
|
||||
"./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/2.wav",
|
||||
"./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/3.wav",
|
||||
"./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/4.wav",
|
||||
"./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/5.wav",
|
||||
"./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/6.wav",
|
||||
"./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/7.wav",
|
||||
"./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/8.wav",
|
||||
"./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/9.wav",
|
||||
"./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/10.wav",
|
||||
"./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/11.wav",
|
||||
"./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/12.wav",
|
||||
"./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/13.wav",
|
||||
};
|
||||
|
||||
AudioTaggingModelConfig modelConfig =
|
||||
AudioTaggingModelConfig.builder().setCED(model).setNumThreads(1).setDebug(true).build();
|
||||
|
||||
AudioTaggingConfig config =
|
||||
AudioTaggingConfig.builder().setModel(modelConfig).setLabels(labels).setTopK(topK).build();
|
||||
|
||||
AudioTagging tagger = new AudioTagging(config);
|
||||
System.out.println("------");
|
||||
for (String filename : testWaves) {
|
||||
WaveReader reader = new WaveReader(filename);
|
||||
|
||||
OfflineStream stream = tagger.createStream();
|
||||
stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());
|
||||
|
||||
AudioEvent[] events = tagger.compute(stream);
|
||||
|
||||
stream.release();
|
||||
|
||||
System.out.printf("input file: %s\n", filename);
|
||||
System.out.printf("Probability\t\tName\n");
|
||||
for (AudioEvent e : events) {
|
||||
System.out.printf("%.3f\t\t\t%s\n", e.getProb(), e.getName());
|
||||
}
|
||||
System.out.println("------");
|
||||
}
|
||||
|
||||
tagger.release();
|
||||
}
|
||||
}
|
||||
68
java-api-examples/AudioTaggingZipformerFromFile.java
Normal file
68
java-api-examples/AudioTaggingZipformerFromFile.java
Normal file
@@ -0,0 +1,68 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
// This file shows how to use a zipformer audio tagging model to tag
|
||||
// input audio files.
|
||||
import com.k2fsa.sherpa.onnx.*;
|
||||
|
||||
public class AudioTaggingZipformerFromFile {
|
||||
public static void main(String[] args) {
|
||||
// please download the model from
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
|
||||
String model = "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/model.int8.onnx";
|
||||
String labels =
|
||||
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/class_labels_indices.csv";
|
||||
int topK = 5;
|
||||
|
||||
String[] testWaves =
|
||||
new String[] {
|
||||
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/1.wav",
|
||||
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/2.wav",
|
||||
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/3.wav",
|
||||
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/4.wav",
|
||||
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/5.wav",
|
||||
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/6.wav",
|
||||
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/7.wav",
|
||||
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/8.wav",
|
||||
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/9.wav",
|
||||
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/10.wav",
|
||||
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/11.wav",
|
||||
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/12.wav",
|
||||
"./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/13.wav",
|
||||
};
|
||||
|
||||
OfflineZipformerAudioTaggingModelConfig zipformer =
|
||||
OfflineZipformerAudioTaggingModelConfig.builder().setModel(model).build();
|
||||
|
||||
AudioTaggingModelConfig modelConfig =
|
||||
AudioTaggingModelConfig.builder()
|
||||
.setZipformer(zipformer)
|
||||
.setNumThreads(1)
|
||||
.setDebug(true)
|
||||
.build();
|
||||
|
||||
AudioTaggingConfig config =
|
||||
AudioTaggingConfig.builder().setModel(modelConfig).setLabels(labels).setTopK(topK).build();
|
||||
|
||||
AudioTagging tagger = new AudioTagging(config);
|
||||
System.out.println("------");
|
||||
for (String filename : testWaves) {
|
||||
WaveReader reader = new WaveReader(filename);
|
||||
|
||||
OfflineStream stream = tagger.createStream();
|
||||
stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());
|
||||
|
||||
AudioEvent[] events = tagger.compute(stream);
|
||||
|
||||
stream.release();
|
||||
|
||||
System.out.printf("input file: %s\n", filename);
|
||||
System.out.printf("Probability\t\tName\n");
|
||||
for (AudioEvent e : events) {
|
||||
System.out.printf("%.3f\t\t\t%s\n", e.getProb(), e.getName());
|
||||
}
|
||||
System.out.println("------");
|
||||
}
|
||||
|
||||
tagger.release();
|
||||
}
|
||||
}
|
||||
@@ -36,10 +36,17 @@ This directory contains examples for the JAVA API of sherpa-onnx.
|
||||
./run-spoken-language-identification-whisper.sh
|
||||
```
|
||||
|
||||
## Add puncutations to text
|
||||
## Add punctuations to text
|
||||
|
||||
The punctuation model supports both English and Chinese.
|
||||
|
||||
```bash
|
||||
./run-add-punctuation-zh-en.sh
|
||||
```
|
||||
|
||||
## Audio tagging
|
||||
|
||||
```bash
|
||||
./run-audio-tagging-zipformer-from-file.sh
|
||||
./run-audio-tagging-ced-from-file.sh
|
||||
```
|
||||
|
||||
@@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
|
||||
popd
|
||||
fi
|
||||
|
||||
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
|
||||
cmake \
|
||||
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||
-DBUILD_SHARED_LIBS=ON \
|
||||
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_JNI=ON \
|
||||
..
|
||||
|
||||
make -j4
|
||||
ls -lh lib
|
||||
fi
|
||||
|
||||
if [ ! -f ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
|
||||
tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
|
||||
|
||||
37
java-api-examples/run-audio-tagging-ced-from-file.sh
Executable file
37
java-api-examples/run-audio-tagging-ced-from-file.sh
Executable file
@@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
|
||||
mkdir -p ../build
|
||||
pushd ../build
|
||||
cmake \
|
||||
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||
-DBUILD_SHARED_LIBS=ON \
|
||||
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_JNI=ON \
|
||||
..
|
||||
|
||||
make -j4
|
||||
ls -lh lib
|
||||
popd
|
||||
fi
|
||||
|
||||
if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
|
||||
pushd ../sherpa-onnx/java-api
|
||||
make
|
||||
popd
|
||||
fi
|
||||
|
||||
if [ ! -f ./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/model.int8.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2
|
||||
tar xvf sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2
|
||||
rm sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2
|
||||
fi
|
||||
|
||||
java \
|
||||
-Djava.library.path=$PWD/../build/lib \
|
||||
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
|
||||
./AudioTaggingCEDFromFile.java
|
||||
37
java-api-examples/run-audio-tagging-zipformer-from-file.sh
Executable file
37
java-api-examples/run-audio-tagging-zipformer-from-file.sh
Executable file
@@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
|
||||
mkdir -p ../build
|
||||
pushd ../build
|
||||
cmake \
|
||||
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||
-DBUILD_SHARED_LIBS=ON \
|
||||
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_JNI=ON \
|
||||
..
|
||||
|
||||
make -j4
|
||||
ls -lh lib
|
||||
popd
|
||||
fi
|
||||
|
||||
if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
|
||||
pushd ../sherpa-onnx/java-api
|
||||
make
|
||||
popd
|
||||
fi
|
||||
|
||||
if [ ! -f ./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/model.int8.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
|
||||
tar xvf sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
|
||||
rm sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
|
||||
fi
|
||||
|
||||
java \
|
||||
-Djava.library.path=$PWD/../build/lib \
|
||||
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
|
||||
./AudioTaggingZipformerFromFile.java
|
||||
@@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
|
||||
popd
|
||||
fi
|
||||
|
||||
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
|
||||
cmake \
|
||||
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||
-DBUILD_SHARED_LIBS=ON \
|
||||
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_JNI=ON \
|
||||
..
|
||||
|
||||
make -j4
|
||||
ls -lh lib
|
||||
fi
|
||||
|
||||
if [ ! -f ./sherpa-onnx-nemo-ctc-en-citrinet-512/tokens.txt ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-en-citrinet-512.tar.bz2
|
||||
tar xvf sherpa-onnx-nemo-ctc-en-citrinet-512.tar.bz2
|
||||
|
||||
@@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
|
||||
popd
|
||||
fi
|
||||
|
||||
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
|
||||
cmake \
|
||||
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||
-DBUILD_SHARED_LIBS=ON \
|
||||
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_JNI=ON \
|
||||
..
|
||||
|
||||
make -j4
|
||||
ls -lh lib
|
||||
fi
|
||||
|
||||
if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
|
||||
|
||||
|
||||
@@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
|
||||
popd
|
||||
fi
|
||||
|
||||
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
|
||||
cmake \
|
||||
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||
-DBUILD_SHARED_LIBS=ON \
|
||||
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_JNI=ON \
|
||||
..
|
||||
|
||||
make -j4
|
||||
ls -lh lib
|
||||
fi
|
||||
|
||||
if [ ! -f ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/tokens.txt ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2
|
||||
|
||||
|
||||
@@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
|
||||
popd
|
||||
fi
|
||||
|
||||
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
|
||||
cmake \
|
||||
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||
-DBUILD_SHARED_LIBS=ON \
|
||||
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_JNI=ON \
|
||||
..
|
||||
|
||||
make -j4
|
||||
ls -lh lib
|
||||
fi
|
||||
|
||||
if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||
|
||||
|
||||
@@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
|
||||
popd
|
||||
fi
|
||||
|
||||
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
|
||||
cmake \
|
||||
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||
-DBUILD_SHARED_LIBS=ON \
|
||||
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_JNI=ON \
|
||||
..
|
||||
|
||||
make -j4
|
||||
ls -lh lib
|
||||
fi
|
||||
|
||||
# please visit
|
||||
# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
|
||||
# to download more models
|
||||
|
||||
@@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
|
||||
popd
|
||||
fi
|
||||
|
||||
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
|
||||
cmake \
|
||||
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||
-DBUILD_SHARED_LIBS=ON \
|
||||
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_JNI=ON \
|
||||
..
|
||||
|
||||
make -j4
|
||||
ls -lh lib
|
||||
fi
|
||||
|
||||
# please visit
|
||||
# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
|
||||
# to download more models
|
||||
|
||||
@@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
|
||||
popd
|
||||
fi
|
||||
|
||||
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
|
||||
cmake \
|
||||
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||
-DBUILD_SHARED_LIBS=ON \
|
||||
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_JNI=ON \
|
||||
..
|
||||
|
||||
make -j4
|
||||
ls -lh lib
|
||||
fi
|
||||
|
||||
# please visit
|
||||
# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
|
||||
# to download more models
|
||||
|
||||
@@ -25,20 +25,6 @@ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
|
||||
popd
|
||||
fi
|
||||
|
||||
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
|
||||
cmake \
|
||||
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||
-DBUILD_SHARED_LIBS=ON \
|
||||
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_JNI=ON \
|
||||
..
|
||||
|
||||
make -j4
|
||||
ls -lh lib
|
||||
fi
|
||||
|
||||
# Note that it needs a multilingual whisper model. so, for example, tiny works while tiny.en does not work
|
||||
# https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
|
||||
if [ ! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx ]; then
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# Copyright 2024 Xiaomi Corporation
|
||||
|
||||
# all .class and .jar files are put inside out_dir
|
||||
out_dir := build
|
||||
@@ -44,6 +45,12 @@ java_files += OfflinePunctuationModelConfig.java
|
||||
java_files += OfflinePunctuationConfig.java
|
||||
java_files += OfflinePunctuation.java
|
||||
|
||||
java_files += OfflineZipformerAudioTaggingModelConfig.java
|
||||
java_files += AudioTaggingModelConfig.java
|
||||
java_files += AudioTaggingConfig.java
|
||||
java_files += AudioEvent.java
|
||||
java_files += AudioTagging.java
|
||||
|
||||
class_files := $(java_files:%.java=%.class)
|
||||
|
||||
java_files := $(addprefix src/$(package_dir)/,$(java_files))
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class AudioEvent {
|
||||
private String name = "";
|
||||
private int index = 0;
|
||||
private float prob = 0;
|
||||
|
||||
public AudioEvent(String name, int index, float prob) {
|
||||
this.name = name;
|
||||
this.index = index;
|
||||
this.prob = prob;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public int getIndex() {
|
||||
return index;
|
||||
}
|
||||
|
||||
public float getProb() {
|
||||
return prob;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("AudioEven(name=%s, index=%d, prob=%.3f)\n", name, index, prob);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class AudioTagging {
|
||||
static {
|
||||
System.loadLibrary("sherpa-onnx-jni");
|
||||
}
|
||||
|
||||
private long ptr = 0;
|
||||
|
||||
public AudioTagging(AudioTaggingConfig config) {
|
||||
ptr = newFromFile(config);
|
||||
}
|
||||
|
||||
public OfflineStream createStream() {
|
||||
long p = createStream(ptr);
|
||||
return new OfflineStream(p);
|
||||
}
|
||||
|
||||
public AudioEvent[] compute(OfflineStream stream) {
|
||||
return compute(stream, -1);
|
||||
|
||||
}
|
||||
|
||||
public AudioEvent[] compute(OfflineStream stream, int topK) {
|
||||
Object[] arr = compute(ptr, stream.getPtr(), topK);
|
||||
|
||||
AudioEvent[] events = new AudioEvent[arr.length];
|
||||
for (int i = 0; i < arr.length; ++i) {
|
||||
Object[] obj = (Object[]) arr[i];
|
||||
String name = (String) obj[0];
|
||||
int index = (int) obj[1];
|
||||
float prob = (float) obj[2];
|
||||
events[i] = new AudioEvent(name, index, prob);
|
||||
}
|
||||
return events;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected void finalize() throws Throwable {
|
||||
release();
|
||||
}
|
||||
|
||||
// You'd better call it manually if it is not used anymore
|
||||
public void release() {
|
||||
if (this.ptr == 0) {
|
||||
return;
|
||||
}
|
||||
delete(this.ptr);
|
||||
this.ptr = 0;
|
||||
}
|
||||
|
||||
private native void delete(long ptr);
|
||||
|
||||
private native long newFromFile(AudioTaggingConfig config);
|
||||
|
||||
private native long createStream(long ptr);
|
||||
|
||||
private native Object[] compute(long ptr, long streamPtr, int topK);
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class AudioTaggingConfig {
|
||||
private final AudioTaggingModelConfig model;
|
||||
private final String labels;
|
||||
private final int topK;
|
||||
|
||||
private AudioTaggingConfig(Builder builder) {
|
||||
this.model = builder.model;
|
||||
this.labels = builder.labels;
|
||||
this.topK = builder.topK;
|
||||
}
|
||||
|
||||
public static Builder builder() {
|
||||
return new AudioTaggingConfig.Builder();
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
private AudioTaggingModelConfig model = AudioTaggingModelConfig.builder().build();
|
||||
private String labels = "";
|
||||
private int topK = 5;
|
||||
|
||||
public AudioTaggingConfig build() {
|
||||
return new AudioTaggingConfig(this);
|
||||
}
|
||||
|
||||
public Builder setModel(AudioTaggingModelConfig model) {
|
||||
this.model = model;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setLabels(String labels) {
|
||||
this.labels = labels;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setTopK(int topK) {
|
||||
this.topK = topK;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class AudioTaggingModelConfig {
|
||||
private final OfflineZipformerAudioTaggingModelConfig zipformer;
|
||||
private final String ced;
|
||||
private final int numThreads;
|
||||
private final boolean debug;
|
||||
private final String provider;
|
||||
|
||||
private AudioTaggingModelConfig(Builder builder) {
|
||||
this.zipformer = builder.zipformer;
|
||||
this.ced = builder.ced;
|
||||
this.numThreads = builder.numThreads;
|
||||
this.debug = builder.debug;
|
||||
this.provider = builder.provider;
|
||||
}
|
||||
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
private OfflineZipformerAudioTaggingModelConfig zipformer = OfflineZipformerAudioTaggingModelConfig.builder().build();
|
||||
private String ced = "";
|
||||
private int numThreads = 1;
|
||||
private boolean debug = true;
|
||||
private String provider = "cpu";
|
||||
|
||||
public AudioTaggingModelConfig build() {
|
||||
return new AudioTaggingModelConfig(this);
|
||||
}
|
||||
|
||||
public Builder setZipformer(OfflineZipformerAudioTaggingModelConfig zipformer) {
|
||||
this.zipformer = zipformer;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setCED(String ced) {
|
||||
this.ced = ced;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setNumThreads(int numThreads) {
|
||||
this.numThreads = numThreads;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setDebug(boolean debug) {
|
||||
this.debug = debug;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setProvider(String provider) {
|
||||
this.provider = provider;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
// Copyright 2022-2023 by zhaoming
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class EndpointRule {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class OfflineModelConfig {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class OfflineNemoEncDecCtcModelConfig {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class OfflineParaformerModelConfig {
|
||||
|
||||
@@ -7,7 +7,7 @@ public class OfflinePunctuation {
|
||||
System.loadLibrary("sherpa-onnx-jni");
|
||||
}
|
||||
|
||||
private long ptr = 0; // this is the asr engine ptrss
|
||||
private long ptr = 0;
|
||||
|
||||
public OfflinePunctuation(OfflinePunctuationConfig config) {
|
||||
ptr = newFromFile(config);
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class OfflineRecognizer {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class OfflineRecognizerConfig {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class OfflineRecognizerResult {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class OfflineStream {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class OfflineTransducerModelConfig {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class OfflineWhisperModelConfig {
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class OfflineZipformerAudioTaggingModelConfig {
|
||||
private final String model;
|
||||
|
||||
private OfflineZipformerAudioTaggingModelConfig(Builder builder) {
|
||||
this.model = builder.model;
|
||||
}
|
||||
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
public String getModel() {
|
||||
return model;
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
private String model = "";
|
||||
|
||||
public OfflineZipformerAudioTaggingModelConfig build() {
|
||||
return new OfflineZipformerAudioTaggingModelConfig(this);
|
||||
}
|
||||
|
||||
public Builder setModel(String model) {
|
||||
this.model = model;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class OnlineCtcFstDecoderConfig {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// Copyright 2022-2023 by zhaoming
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class OnlineRecognizer {
|
||||
static {
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
// Copyright 2022-2023 by zhaoming
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class OnlineRecognizerConfig {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class OnlineRecognizerResult {
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
// Copyright 2022-2023 by zhaoming
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class OnlineStream {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class OnlineZipformer2CtcModelConfig {
|
||||
|
||||
Reference in New Issue
Block a user