Add Kotlin and Java API for online punctuation models (#1936)

This commit is contained in:
Fangjun Kuang
2025-02-27 16:52:36 +08:00
committed by GitHub
parent 815ebac8f9
commit f5dfcf8d2f
16 changed files with 474 additions and 13 deletions

View File

@@ -0,0 +1 @@
../sherpa-onnx/kotlin-api/OnlinePunctuation.kt

View File

@@ -302,16 +302,16 @@ function testInverseTextNormalizationOnlineAsr() {
java -Djava.library.path=../build/lib -jar $out_filename
}
function testPunctuation() {
function testOfflinePunctuation() {
if [ ! -f ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
fi
out_filename=test_punctuation.jar
out_filename=test_offline_punctuation.jar
kotlinc-jvm -include-runtime -d $out_filename \
./test_punctuation.kt \
./test_offline_punctuation.kt \
./OfflinePunctuation.kt \
faked-asset-manager.kt \
faked-log.kt
@@ -321,6 +321,25 @@ function testPunctuation() {
java -Djava.library.path=../build/lib -jar $out_filename
}
function testOnlinePunctuation() {
if [ ! -f ./sherpa-onnx-online-punct-en-2024-08-06/model.int8.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
tar xvf sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
rm sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
fi
out_filename=test_online_punctuation.jar
kotlinc-jvm -include-runtime -d $out_filename \
./test_online_punctuation.kt \
./OnlinePunctuation.kt \
faked-asset-manager.kt \
faked-log.kt
ls -lh $out_filename
java -Djava.library.path=../build/lib -jar $out_filename
}
function testOfflineSpeakerDiarization() {
if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
@@ -359,6 +378,7 @@ testTts
testAudioTagging
testSpokenLanguageIdentification
testOfflineAsr
testPunctuation
testOfflinePunctuation
testOnlinePunctuation
testInverseTextNormalizationOfflineAsr
testInverseTextNormalizationOnlineAsr

View File

@@ -0,0 +1,30 @@
package com.k2fsa.sherpa.onnx
fun main() {
testPunctuation()
}
// https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
fun testPunctuation() {
val config = OnlinePunctuationConfig(
model=OnlinePunctuationModelConfig(
cnnBilstm="./sherpa-onnx-online-punct-en-2024-08-06/model.int8.onnx",
bpeVocab="./sherpa-onnx-online-punct-en-2024-08-06/bpe.vocab",
numThreads=1,
debug=true,
provider="cpu",
)
)
val punct = OnlinePunctuation(config = config)
val sentences = arrayOf(
"how are you doing fantastic thank you what is about you",
"The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry",
)
println("---")
for (text in sentences) {
val out = punct.addPunctuation(text)
println("Input: $text")
println("Output: $out")
println("---")
}
}