Support adding punctuations to the speech recogntion result (#761)

This commit is contained in:
Fangjun Kuang
2024-04-13 12:15:57 +08:00
committed by GitHub
parent 0f4705f775
commit 329fe1aa8b
27 changed files with 867 additions and 17 deletions

41
.github/scripts/test-offline-punctuation.sh vendored Executable file
View File

@@ -0,0 +1,41 @@
#!/usr/bin/env bash
set -ex
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
echo "EXE is $EXE"
echo "PATH: $PATH"
which $EXE
log "------------------------------------------------------------"
log "Download model "
log "------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
repo=sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12
ls -lh $repo
$EXE \
--debug=1 \
--ct-transformer=$repo/model.onnx \
"这是一个测试你好吗How are you我很好thank you are you ok谢谢你"
$EXE \
--debug=1 \
--ct-transformer=$repo/model.onnx \
"我们都是木头人不会说话不会动"
$EXE \
--debug=1 \
--ct-transformer=$repo/model.onnx \
"The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry"
rm -rf $repo

View File

@@ -16,6 +16,7 @@ on:
- '.github/scripts/test-online-ctc.sh'
- '.github/scripts/test-offline-tts.sh'
- '.github/scripts/test-audio-tagging.sh'
- '.github/scripts/test-offline-punctuation.sh'
- 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
@@ -34,6 +35,7 @@ on:
- '.github/scripts/test-online-ctc.sh'
- '.github/scripts/test-offline-tts.sh'
- '.github/scripts/test-audio-tagging.sh'
- '.github/scripts/test-offline-punctuation.sh'
- 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
@@ -126,6 +128,14 @@ jobs:
name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
path: build/bin/*
- name: Test offline punctuation
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-punctuation
.github/scripts/test-offline-punctuation.sh
- name: Test C API
shell: bash
run: |

View File

@@ -16,6 +16,7 @@ on:
- '.github/scripts/test-offline-tts.sh'
- '.github/scripts/test-online-ctc.sh'
- '.github/scripts/test-audio-tagging.sh'
- '.github/scripts/test-offline-punctuation.sh'
- 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
@@ -33,6 +34,7 @@ on:
- '.github/scripts/test-offline-tts.sh'
- '.github/scripts/test-online-ctc.sh'
- '.github/scripts/test-audio-tagging.sh'
- '.github/scripts/test-offline-punctuation.sh'
- 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
@@ -105,6 +107,14 @@ jobs:
otool -L build/bin/sherpa-onnx
otool -l build/bin/sherpa-onnx
- name: Test offline punctuation
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-punctuation
.github/scripts/test-offline-punctuation.sh
- name: Test C API
shell: bash
run: |

View File

@@ -15,6 +15,7 @@ on:
- '.github/scripts/test-online-ctc.sh'
- '.github/scripts/test-offline-tts.sh'
- '.github/scripts/test-audio-tagging.sh'
- '.github/scripts/test-offline-punctuation.sh'
- 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
@@ -30,6 +31,7 @@ on:
- '.github/scripts/test-online-ctc.sh'
- '.github/scripts/test-offline-tts.sh'
- '.github/scripts/test-audio-tagging.sh'
- '.github/scripts/test-offline-punctuation.sh'
- 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
@@ -72,6 +74,14 @@ jobs:
ls -lh ./bin/Release/sherpa-onnx.exe
- name: Test offline punctuation
shell: bash
run: |
export PATH=$PWD/build/bin/Release:$PATH
export EXE=sherpa-onnx-offline-punctuation.exe
.github/scripts/test-offline-punctuation.sh
- name: Test C API
shell: bash
run: |
@@ -82,7 +92,6 @@ jobs:
.github/scripts/test-c-api.sh
- name: Test Audio tagging
shell: bash
run: |

View File

@@ -15,6 +15,7 @@ on:
- '.github/scripts/test-offline-tts.sh'
- '.github/scripts/test-online-ctc.sh'
- '.github/scripts/test-audio-tagging.sh'
- '.github/scripts/test-offline-punctuation.sh'
- 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
@@ -30,6 +31,7 @@ on:
- '.github/scripts/test-offline-tts.sh'
- '.github/scripts/test-online-ctc.sh'
- '.github/scripts/test-audio-tagging.sh'
- '.github/scripts/test-offline-punctuation.sh'
- 'CMakeLists.txt'
- 'cmake/**'
- 'sherpa-onnx/csrc/*'
@@ -72,6 +74,14 @@ jobs:
ls -lh ./bin/Release/sherpa-onnx.exe
- name: Test offline punctuation
shell: bash
run: |
export PATH=$PWD/build/bin/Release:$PATH
export EXE=sherpa-onnx-offline-punctuation.exe
.github/scripts/test-offline-punctuation.sh
- name: Test spoken language identification (C API)
shell: bash
run: |