Add Java and Kotlin API for punctuation models (#818)
This commit is contained in:
40
java-api-examples/AddPunctuation.java
Normal file
40
java-api-examples/AddPunctuation.java
Normal file
@@ -0,0 +1,40 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
// This file shows how to use a punctuation model to add punctuations to text.
|
||||
//
|
||||
// The model supports both English and Chinese.
|
||||
import com.k2fsa.sherpa.onnx.*;
|
||||
|
||||
public class AddPunctuation {
|
||||
public static void main(String[] args) {
|
||||
// please download the model from
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models
|
||||
String model = "./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx";
|
||||
OfflinePunctuationModelConfig modelConfig =
|
||||
OfflinePunctuationModelConfig.builder()
|
||||
.setCtTransformer(model)
|
||||
.setNumThreads(1)
|
||||
.setDebug(true)
|
||||
.build();
|
||||
OfflinePunctuationConfig config =
|
||||
OfflinePunctuationConfig.builder().setModel(modelConfig).build();
|
||||
|
||||
OfflinePunctuation punct = new OfflinePunctuation(config);
|
||||
|
||||
String[] sentences =
|
||||
new String[] {
|
||||
"这是一个测试你好吗How are you我很好thank you are you ok谢谢你",
|
||||
"我们都是木头人不会说话不会动",
|
||||
"The African blogosphere is rapidly expanding bringing more voices online in the form of"
|
||||
+ " commentaries opinions analyses rants and poetry",
|
||||
};
|
||||
|
||||
System.out.println("---");
|
||||
for (String text : sentences) {
|
||||
String out = punct.addPunctuation(text);
|
||||
System.out.printf("Input: %s\n", text);
|
||||
System.out.printf("Output: %s\n", out);
|
||||
System.out.println("---");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -35,3 +35,11 @@ This directory contains examples for the JAVA API of sherpa-onnx.
|
||||
```bash
|
||||
./run-spoken-language-identification-whisper.sh
|
||||
```
|
||||
|
||||
## Add puncutations to text
|
||||
|
||||
The punctuation model supports both English and Chinese.
|
||||
|
||||
```bash
|
||||
./run-add-punctuation-zh-en.sh
|
||||
```
|
||||
|
||||
51
java-api-examples/run-add-punctuation-zh-en.sh
Executable file
51
java-api-examples/run-add-punctuation-zh-en.sh
Executable file
@@ -0,0 +1,51 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
|
||||
mkdir -p ../build
|
||||
pushd ../build
|
||||
cmake \
|
||||
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||
-DBUILD_SHARED_LIBS=ON \
|
||||
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_JNI=ON \
|
||||
..
|
||||
|
||||
make -j4
|
||||
ls -lh lib
|
||||
popd
|
||||
fi
|
||||
|
||||
if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
|
||||
pushd ../sherpa-onnx/java-api
|
||||
make
|
||||
popd
|
||||
fi
|
||||
|
||||
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
|
||||
cmake \
|
||||
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||
-DBUILD_SHARED_LIBS=ON \
|
||||
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_JNI=ON \
|
||||
..
|
||||
|
||||
make -j4
|
||||
ls -lh lib
|
||||
fi
|
||||
|
||||
if [ ! -f ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
|
||||
tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
|
||||
rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
|
||||
fi
|
||||
|
||||
java \
|
||||
-Djava.library.path=$PWD/../build/lib \
|
||||
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
|
||||
./AddPunctuation.java
|
||||
Reference in New Issue
Block a user