Add Java and Kotlin API for punctuation models (#818)

This commit is contained in:
Fangjun Kuang
2024-04-26 22:06:48 +08:00
committed by GitHub
parent db25986240
commit 5407f880c0
19 changed files with 515 additions and 0 deletions

View File

@@ -0,0 +1 @@
../sherpa-onnx/kotlin-api/OfflinePunctuation.kt

View File

@@ -197,9 +197,29 @@ function testOfflineAsr() {
java -Djava.library.path=../build/lib -jar $out_filename
}
function testPunctuation() {
if [ ! -f ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
fi
out_filename=test_punctuation.jar
kotlinc-jvm -include-runtime -d $out_filename \
./test_punctuation.kt \
./OfflinePunctuation.kt \
faked-asset-manager.kt \
faked-log.kt
ls -lh $out_filename
java -Djava.library.path=../build/lib -jar $out_filename
}
testSpeakerEmbeddingExtractor
testOnlineAsr
testTts
testAudioTagging
testSpokenLanguageIdentification
testOfflineAsr
testPunctuation

View File

@@ -0,0 +1,31 @@
package com.k2fsa.sherpa.onnx
fun main() {
testPunctuation()
}
fun testPunctuation() {
val config = OfflinePunctuationConfig(
model=OfflinePunctuationModelConfig(
ctTransformer="./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx",
numThreads=1,
debug=true,
provider="cpu",
)
)
val punct = OfflinePunctuation(config = config)
val sentences = arrayOf(
"这是一个测试你好吗How are you我很好thank you are you ok谢谢你",
"我们都是木头人不会说话不会动",
"The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry",
)
println("---")
for (text in sentences) {
val out = punct.addPunctuation(text)
println("Input: $text")
println("Output: $out")
println("---")
}
println(sentences)
}