Support adding punctuations to the speech recogntion result (#761)

This commit is contained in:
Fangjun Kuang
2024-04-13 12:15:57 +08:00
committed by GitHub
parent 0f4705f775
commit 329fe1aa8b
27 changed files with 867 additions and 17 deletions

41
.github/scripts/test-offline-punctuation.sh vendored Executable file
View File

@@ -0,0 +1,41 @@
#!/usr/bin/env bash
set -ex
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
echo "EXE is $EXE"
echo "PATH: $PATH"
which $EXE
log "------------------------------------------------------------"
log "Download model "
log "------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
repo=sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12
ls -lh $repo
$EXE \
--debug=1 \
--ct-transformer=$repo/model.onnx \
"这是一个测试你好吗How are you我很好thank you are you ok谢谢你"
$EXE \
--debug=1 \
--ct-transformer=$repo/model.onnx \
"我们都是木头人不会说话不会动"
$EXE \
--debug=1 \
--ct-transformer=$repo/model.onnx \
"The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry"
rm -rf $repo