Support adding punctuations to the speech recogntion result (#761)
This commit is contained in:
41
.github/scripts/test-offline-punctuation.sh
vendored
Executable file
41
.github/scripts/test-offline-punctuation.sh
vendored
Executable file
@@ -0,0 +1,41 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
log() {
|
||||
# This function is from espnet
|
||||
local fname=${BASH_SOURCE[1]##*/}
|
||||
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
||||
}
|
||||
|
||||
echo "EXE is $EXE"
|
||||
echo "PATH: $PATH"
|
||||
|
||||
which $EXE
|
||||
|
||||
log "------------------------------------------------------------"
|
||||
log "Download model "
|
||||
log "------------------------------------------------------------"
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
|
||||
tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
|
||||
rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
|
||||
repo=sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12
|
||||
ls -lh $repo
|
||||
|
||||
$EXE \
|
||||
--debug=1 \
|
||||
--ct-transformer=$repo/model.onnx \
|
||||
"这是一个测试你好吗How are you我很好thank you are you ok谢谢你"
|
||||
|
||||
$EXE \
|
||||
--debug=1 \
|
||||
--ct-transformer=$repo/model.onnx \
|
||||
"我们都是木头人不会说话不会动"
|
||||
|
||||
$EXE \
|
||||
--debug=1 \
|
||||
--ct-transformer=$repo/model.onnx \
|
||||
"The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry"
|
||||
|
||||
rm -rf $repo
|
||||
10
.github/workflows/linux.yaml
vendored
10
.github/workflows/linux.yaml
vendored
@@ -16,6 +16,7 @@ on:
|
||||
- '.github/scripts/test-online-ctc.sh'
|
||||
- '.github/scripts/test-offline-tts.sh'
|
||||
- '.github/scripts/test-audio-tagging.sh'
|
||||
- '.github/scripts/test-offline-punctuation.sh'
|
||||
- 'CMakeLists.txt'
|
||||
- 'cmake/**'
|
||||
- 'sherpa-onnx/csrc/*'
|
||||
@@ -34,6 +35,7 @@ on:
|
||||
- '.github/scripts/test-online-ctc.sh'
|
||||
- '.github/scripts/test-offline-tts.sh'
|
||||
- '.github/scripts/test-audio-tagging.sh'
|
||||
- '.github/scripts/test-offline-punctuation.sh'
|
||||
- 'CMakeLists.txt'
|
||||
- 'cmake/**'
|
||||
- 'sherpa-onnx/csrc/*'
|
||||
@@ -126,6 +128,14 @@ jobs:
|
||||
name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
|
||||
path: build/bin/*
|
||||
|
||||
- name: Test offline punctuation
|
||||
shell: bash
|
||||
run: |
|
||||
export PATH=$PWD/build/bin:$PATH
|
||||
export EXE=sherpa-onnx-offline-punctuation
|
||||
|
||||
.github/scripts/test-offline-punctuation.sh
|
||||
|
||||
- name: Test C API
|
||||
shell: bash
|
||||
run: |
|
||||
|
||||
10
.github/workflows/macos.yaml
vendored
10
.github/workflows/macos.yaml
vendored
@@ -16,6 +16,7 @@ on:
|
||||
- '.github/scripts/test-offline-tts.sh'
|
||||
- '.github/scripts/test-online-ctc.sh'
|
||||
- '.github/scripts/test-audio-tagging.sh'
|
||||
- '.github/scripts/test-offline-punctuation.sh'
|
||||
- 'CMakeLists.txt'
|
||||
- 'cmake/**'
|
||||
- 'sherpa-onnx/csrc/*'
|
||||
@@ -33,6 +34,7 @@ on:
|
||||
- '.github/scripts/test-offline-tts.sh'
|
||||
- '.github/scripts/test-online-ctc.sh'
|
||||
- '.github/scripts/test-audio-tagging.sh'
|
||||
- '.github/scripts/test-offline-punctuation.sh'
|
||||
- 'CMakeLists.txt'
|
||||
- 'cmake/**'
|
||||
- 'sherpa-onnx/csrc/*'
|
||||
@@ -105,6 +107,14 @@ jobs:
|
||||
otool -L build/bin/sherpa-onnx
|
||||
otool -l build/bin/sherpa-onnx
|
||||
|
||||
- name: Test offline punctuation
|
||||
shell: bash
|
||||
run: |
|
||||
export PATH=$PWD/build/bin:$PATH
|
||||
export EXE=sherpa-onnx-offline-punctuation
|
||||
|
||||
.github/scripts/test-offline-punctuation.sh
|
||||
|
||||
- name: Test C API
|
||||
shell: bash
|
||||
run: |
|
||||
|
||||
11
.github/workflows/windows-x64.yaml
vendored
11
.github/workflows/windows-x64.yaml
vendored
@@ -15,6 +15,7 @@ on:
|
||||
- '.github/scripts/test-online-ctc.sh'
|
||||
- '.github/scripts/test-offline-tts.sh'
|
||||
- '.github/scripts/test-audio-tagging.sh'
|
||||
- '.github/scripts/test-offline-punctuation.sh'
|
||||
- 'CMakeLists.txt'
|
||||
- 'cmake/**'
|
||||
- 'sherpa-onnx/csrc/*'
|
||||
@@ -30,6 +31,7 @@ on:
|
||||
- '.github/scripts/test-online-ctc.sh'
|
||||
- '.github/scripts/test-offline-tts.sh'
|
||||
- '.github/scripts/test-audio-tagging.sh'
|
||||
- '.github/scripts/test-offline-punctuation.sh'
|
||||
- 'CMakeLists.txt'
|
||||
- 'cmake/**'
|
||||
- 'sherpa-onnx/csrc/*'
|
||||
@@ -72,6 +74,14 @@ jobs:
|
||||
|
||||
ls -lh ./bin/Release/sherpa-onnx.exe
|
||||
|
||||
- name: Test offline punctuation
|
||||
shell: bash
|
||||
run: |
|
||||
export PATH=$PWD/build/bin/Release:$PATH
|
||||
export EXE=sherpa-onnx-offline-punctuation.exe
|
||||
|
||||
.github/scripts/test-offline-punctuation.sh
|
||||
|
||||
- name: Test C API
|
||||
shell: bash
|
||||
run: |
|
||||
@@ -82,7 +92,6 @@ jobs:
|
||||
|
||||
.github/scripts/test-c-api.sh
|
||||
|
||||
|
||||
- name: Test Audio tagging
|
||||
shell: bash
|
||||
run: |
|
||||
|
||||
10
.github/workflows/windows-x86.yaml
vendored
10
.github/workflows/windows-x86.yaml
vendored
@@ -15,6 +15,7 @@ on:
|
||||
- '.github/scripts/test-offline-tts.sh'
|
||||
- '.github/scripts/test-online-ctc.sh'
|
||||
- '.github/scripts/test-audio-tagging.sh'
|
||||
- '.github/scripts/test-offline-punctuation.sh'
|
||||
- 'CMakeLists.txt'
|
||||
- 'cmake/**'
|
||||
- 'sherpa-onnx/csrc/*'
|
||||
@@ -30,6 +31,7 @@ on:
|
||||
- '.github/scripts/test-offline-tts.sh'
|
||||
- '.github/scripts/test-online-ctc.sh'
|
||||
- '.github/scripts/test-audio-tagging.sh'
|
||||
- '.github/scripts/test-offline-punctuation.sh'
|
||||
- 'CMakeLists.txt'
|
||||
- 'cmake/**'
|
||||
- 'sherpa-onnx/csrc/*'
|
||||
@@ -72,6 +74,14 @@ jobs:
|
||||
|
||||
ls -lh ./bin/Release/sherpa-onnx.exe
|
||||
|
||||
- name: Test offline punctuation
|
||||
shell: bash
|
||||
run: |
|
||||
export PATH=$PWD/build/bin/Release:$PATH
|
||||
export EXE=sherpa-onnx-offline-punctuation.exe
|
||||
|
||||
.github/scripts/test-offline-punctuation.sh
|
||||
|
||||
- name: Test spoken language identification (C API)
|
||||
shell: bash
|
||||
run: |
|
||||
|
||||
Reference in New Issue
Block a user