Support streaming paraformer (#263)

This commit is contained in:
Fangjun Kuang
2023-08-14 10:32:14 +08:00
committed by GitHub
parent a4bff28e21
commit 6038e2aa62
38 changed files with 1488 additions and 112 deletions

View File

@@ -9,6 +9,7 @@ on:
paths:
- '.github/workflows/linux-gpu.yaml'
- '.github/scripts/test-online-transducer.sh'
- '.github/scripts/test-online-paraformer.sh'
- '.github/scripts/test-offline-transducer.sh'
- '.github/scripts/test-offline-ctc.sh'
- 'CMakeLists.txt'
@@ -22,6 +23,7 @@ on:
paths:
- '.github/workflows/linux-gpu.yaml'
- '.github/scripts/test-online-transducer.sh'
- '.github/scripts/test-online-paraformer.sh'
- '.github/scripts/test-offline-transducer.sh'
- '.github/scripts/test-offline-ctc.sh'
- 'CMakeLists.txt'
@@ -85,6 +87,14 @@ jobs:
file build/bin/sherpa-onnx
readelf -d build/bin/sherpa-onnx
- name: Test online paraformer
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx
.github/scripts/test-online-paraformer.sh
- name: Test offline Whisper
shell: bash
run: |

View File

@@ -9,6 +9,7 @@ on:
paths:
- '.github/workflows/linux.yaml'
- '.github/scripts/test-online-transducer.sh'
- '.github/scripts/test-online-paraformer.sh'
- '.github/scripts/test-offline-transducer.sh'
- '.github/scripts/test-offline-ctc.sh'
- 'CMakeLists.txt'
@@ -22,6 +23,7 @@ on:
paths:
- '.github/workflows/linux.yaml'
- '.github/scripts/test-online-transducer.sh'
- '.github/scripts/test-online-paraformer.sh'
- '.github/scripts/test-offline-transducer.sh'
- '.github/scripts/test-offline-ctc.sh'
- 'CMakeLists.txt'
@@ -84,6 +86,14 @@ jobs:
file build/bin/sherpa-onnx
readelf -d build/bin/sherpa-onnx
- name: Test online paraformer
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx
.github/scripts/test-online-paraformer.sh
- name: Test offline Whisper
shell: bash
run: |

View File

@@ -7,6 +7,7 @@ on:
paths:
- '.github/workflows/macos.yaml'
- '.github/scripts/test-online-transducer.sh'
- '.github/scripts/test-online-paraformer.sh'
- '.github/scripts/test-offline-transducer.sh'
- '.github/scripts/test-offline-ctc.sh'
- 'CMakeLists.txt'
@@ -18,6 +19,7 @@ on:
paths:
- '.github/workflows/macos.yaml'
- '.github/scripts/test-online-transducer.sh'
- '.github/scripts/test-online-paraformer.sh'
- '.github/scripts/test-offline-transducer.sh'
- '.github/scripts/test-offline-ctc.sh'
- 'CMakeLists.txt'
@@ -82,6 +84,14 @@ jobs:
otool -L build/bin/sherpa-onnx
otool -l build/bin/sherpa-onnx
- name: Test online paraformer
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx
.github/scripts/test-online-paraformer.sh
- name: Test offline Whisper
shell: bash
run: |

View File

@@ -58,7 +58,6 @@ jobs:
sherpa-onnx-microphone-offline --help
sherpa-onnx-offline-websocket-server --help
sherpa-onnx-offline-websocket-client --help
sherpa-onnx-online-websocket-server --help
sherpa-onnx-online-websocket-client --help

View File

@@ -84,14 +84,14 @@ jobs:
if: matrix.model_type == 'paraformer'
shell: bash
run: |
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28
cd sherpa-onnx-paraformer-zh-2023-03-28
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-bilingual-zh-en
cd sherpa-onnx-paraformer-bilingual-zh-en
git lfs pull --include "*.onnx"
cd ..
python3 ./python-api-examples/non_streaming_server.py \
--paraformer ./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx \
--tokens ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt &
--paraformer ./sherpa-onnx-paraformer-bilingual-zh-en/model.int8.onnx \
--tokens ./sherpa-onnx-paraformer-bilingual-zh-en/tokens.txt &
echo "sleep 10 seconds to wait the server start"
sleep 10
@@ -101,16 +101,16 @@ jobs:
shell: bash
run: |
python3 ./python-api-examples/offline-websocket-client-decode-files-paralell.py \
./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav \
./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/1.wav \
./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/2.wav \
./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/8k.wav
./sherpa-onnx-paraformer-bilingual-zh-en/test_wavs/0.wav \
./sherpa-onnx-paraformer-bilingual-zh-en/test_wavs/1.wav \
./sherpa-onnx-paraformer-bilingual-zh-en/test_wavs/2.wav \
./sherpa-onnx-paraformer-bilingual-zh-en/test_wavs/8k.wav
python3 ./python-api-examples/offline-websocket-client-decode-files-sequential.py \
./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav \
./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/1.wav \
./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/2.wav \
./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/8k.wav
./sherpa-onnx-paraformer-bilingual-zh-en/test_wavs/0.wav \
./sherpa-onnx-paraformer-bilingual-zh-en/test_wavs/1.wav \
./sherpa-onnx-paraformer-bilingual-zh-en/test_wavs/2.wav \
./sherpa-onnx-paraformer-bilingual-zh-en/test_wavs/8k.wav
- name: Start server for nemo_ctc models
if: matrix.model_type == 'nemo_ctc'

View File

@@ -24,7 +24,7 @@ jobs:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
model_type: ["transducer"]
model_type: ["transducer", "paraformer"]
steps:
- uses: actions/checkout@v2
@@ -71,3 +71,36 @@ jobs:
run: |
python3 ./python-api-examples/online-websocket-client-decode-file.py \
./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/0.wav
- name: Start server for paraformer models
if: matrix.model_type == 'paraformer'
shell: bash
run: |
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-paraformer-bilingual-zh-en
cd sherpa-onnx-streaming-paraformer-bilingual-zh-en
git lfs pull --include "*.onnx"
cd ..
python3 ./python-api-examples/streaming_server.py \
--tokens ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
--paraformer-encoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \
--paraformer-decoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx &
echo "sleep 10 seconds to wait the server start"
sleep 10
- name: Start client for paraformer models
if: matrix.model_type == 'paraformer'
shell: bash
run: |
python3 ./python-api-examples/online-websocket-client-decode-file.py \
./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav
python3 ./python-api-examples/online-websocket-client-decode-file.py \
./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/1.wav
python3 ./python-api-examples/online-websocket-client-decode-file.py \
./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/2.wav
python3 ./python-api-examples/online-websocket-client-decode-file.py \
./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/3.wav

View File

@@ -9,6 +9,7 @@ on:
paths:
- '.github/workflows/windows-x64-cuda.yaml'
- '.github/scripts/test-online-transducer.sh'
- '.github/scripts/test-online-paraformer.sh'
- '.github/scripts/test-offline-transducer.sh'
- '.github/scripts/test-offline-ctc.sh'
- 'CMakeLists.txt'
@@ -20,6 +21,7 @@ on:
paths:
- '.github/workflows/windows-x64-cuda.yaml'
- '.github/scripts/test-online-transducer.sh'
- '.github/scripts/test-online-paraformer.sh'
- '.github/scripts/test-offline-transducer.sh'
- '.github/scripts/test-offline-ctc.sh'
- 'CMakeLists.txt'
@@ -74,6 +76,14 @@ jobs:
ls -lh ./bin/Release/sherpa-onnx.exe
- name: Test online paraformer for windows x64
shell: bash
run: |
export PATH=$PWD/build/bin/Release:$PATH
export EXE=sherpa-onnx.exe
.github/scripts/test-online-paraformer.sh
- name: Test offline Whisper for windows x64
shell: bash
run: |

View File

@@ -9,6 +9,7 @@ on:
paths:
- '.github/workflows/windows-x64.yaml'
- '.github/scripts/test-online-transducer.sh'
- '.github/scripts/test-online-paraformer.sh'
- '.github/scripts/test-offline-transducer.sh'
- '.github/scripts/test-offline-ctc.sh'
- 'CMakeLists.txt'
@@ -20,6 +21,7 @@ on:
paths:
- '.github/workflows/windows-x64.yaml'
- '.github/scripts/test-online-transducer.sh'
- '.github/scripts/test-online-paraformer.sh'
- '.github/scripts/test-offline-transducer.sh'
- '.github/scripts/test-offline-ctc.sh'
- 'CMakeLists.txt'
@@ -75,6 +77,14 @@ jobs:
ls -lh ./bin/Release/sherpa-onnx.exe
- name: Test online paraformer for windows x64
shell: bash
run: |
export PATH=$PWD/build/bin/Release:$PATH
export EXE=sherpa-onnx.exe
.github/scripts/test-online-paraformer.sh
- name: Test offline Whisper for windows x64
shell: bash
run: |

View File

@@ -7,6 +7,7 @@ on:
paths:
- '.github/workflows/windows-x86.yaml'
- '.github/scripts/test-online-transducer.sh'
- '.github/scripts/test-online-paraformer.sh'
- '.github/scripts/test-offline-transducer.sh'
- '.github/scripts/test-offline-ctc.sh'
- 'CMakeLists.txt'
@@ -18,6 +19,7 @@ on:
paths:
- '.github/workflows/windows-x86.yaml'
- '.github/scripts/test-online-transducer.sh'
- '.github/scripts/test-online-paraformer.sh'
- '.github/scripts/test-offline-transducer.sh'
- '.github/scripts/test-offline-ctc.sh'
- 'CMakeLists.txt'
@@ -73,6 +75,14 @@ jobs:
ls -lh ./bin/Release/sherpa-onnx.exe
- name: Test online paraformer for windows x86
shell: bash
run: |
export PATH=$PWD/build/bin/Release:$PATH
export EXE=sherpa-onnx.exe
.github/scripts/test-online-paraformer.sh
- name: Test offline Whisper for windows x86
shell: bash
run: |