decoder for open vocabulary keyword spotting (#505)

* various fixes to ContextGraph to support open vocabulary keywords decoder

* Add keyword spotter runtime

* Add binary

* First version works

* Minor fixes

* update text2token

* default values

* Add jni for kws

* add kws android project

* Minor fixes

* Remove unused interface

* Minor fixes

* Add workflow

* handle extra info in texts

* Minor fixes

* Add more comments

* Fix ci

* fix cpp style

* Add input box in android demo so that users can specify their keywords

* Fix cpp style

* Fix comments

* Minor fixes

* Minor fixes

* minor fixes

* Minor fixes

* Minor fixes

* Add CI

* Fix code style

* cpplint

* Fix comments

* Fix error
This commit is contained in:
Wei Kang
2024-01-20 22:52:41 +08:00
committed by GitHub
parent bf1dd3daf6
commit b6c020901a
77 changed files with 3316 additions and 68 deletions

68
.github/scripts/test-kws.sh vendored Executable file
View File

@@ -0,0 +1,68 @@
#!/usr/bin/env bash
set -e
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
echo "EXE is $EXE"
echo "PATH: $PATH"
which $EXE
log "------------------------------------------------------------"
log "Run Chinese keyword spotting (Wenetspeech"
log "------------------------------------------------------------"
repo_url=https://www.modelscope.cn/pkufool/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.git
log "Start testing ${repo_url}"
repo=sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01
log "Download pretrained model and test-data from $repo_url"
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
pushd $repo
git lfs pull --include "*.onnx"
ls -lh *.onnx
popd
time $EXE \
--tokens=$repo/tokens.txt \
--encoder=$repo/encoder-epoch-12-avg-2-chunk-16-left-64.onnx \
--decoder=$repo/decoder-epoch-12-avg-2-chunk-16-left-64.onnx \
--joiner=$repo/joiner-epoch-12-avg-2-chunk-16-left-64.onnx \
--keywords-file=$repo/test_wavs/test_keywords.txt \
--max-active-paths=4 \
--num-threads=4 \
$repo/test_wavs/3.wav $repo/test_wavs/4.wav $repo/test_wavs/5.wav $repo/test_wavs/6.wav
rm -rf $repo
log "------------------------------------------------------------"
log "Run English keyword spotting (Gigaspeech"
log "------------------------------------------------------------"
repo_url=https://www.modelscope.cn/pkufool/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01.git
log "Start testing ${repo_url}"
repo=sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01
log "Download pretrained model and test-data from $repo_url"
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
pushd $repo
git lfs pull --include "*.onnx"
ls -lh *.onnx
popd
time $EXE \
--tokens=$repo/tokens.txt \
--encoder=$repo/encoder-epoch-12-avg-2-chunk-16-left-64.onnx \
--decoder=$repo/decoder-epoch-12-avg-2-chunk-16-left-64.onnx \
--joiner=$repo/joiner-epoch-12-avg-2-chunk-16-left-64.onnx \
--keywords-file=$repo/test_wavs/test_keywords.txt \
--max-active-paths=4 \
--num-threads=4 \
$repo/test_wavs/0.wav $repo/test_wavs/1.wav
rm -rf $repo

67
.github/workflows/apk-kws.yaml vendored Normal file
View File

@@ -0,0 +1,67 @@
name: apk-kws
on:
push:
branches:
- apk-kws
tags:
- '*'
workflow_dispatch:
concurrency:
group: apk-kws-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: write
jobs:
apk:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: ccache
uses: hendrikmuhs/ccache-action@v1.2
with:
key: ${{ matrix.os }}-android
- name: Display NDK HOME
shell: bash
run: |
echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
ls -lh ${ANDROID_NDK_LATEST_HOME}
- name: build APK
shell: bash
run: |
export CMAKE_CXX_COMPILER_LAUNCHER=ccache
export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
cmake --version
export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
./build-kws-apk.sh
- name: Display APK
shell: bash
run: |
ls -lh ./apks/
- uses: actions/upload-artifact@v3
with:
path: ./apks/*.apk
- name: Release APK
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
file: apks/*.apk
overwrite: true

View File

@@ -107,6 +107,14 @@ jobs:
name: release-static
path: build/bin/*
- name: Test transducer kws
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-keyword-spotter
.github/scripts/test-kws.sh
- name: Test online CTC
shell: bash
run: |

View File

@@ -98,6 +98,14 @@ jobs:
otool -L build/bin/sherpa-onnx
otool -l build/bin/sherpa-onnx
- name: Test transducer kws
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-keyword-spotter
.github/scripts/test-kws.sh
- name: Test online CTC
shell: bash
run: |
@@ -106,7 +114,6 @@ jobs:
.github/scripts/test-online-ctc.sh
- name: Test offline TTS
shell: bash
run: |

View File

@@ -62,7 +62,7 @@ jobs:
- name: Install Python dependencies
shell: bash
run: |
python3 -m pip install --upgrade pip numpy sentencepiece==0.1.96 soundfile
python3 -m pip install --upgrade pip numpy pypinyin sentencepiece==0.1.96 soundfile
- name: Install sherpa-onnx
shell: bash

View File

@@ -45,7 +45,7 @@ jobs:
- name: Install Python dependencies
shell: bash
run: |
python3 -m pip install --upgrade pip numpy sentencepiece
python3 -m pip install --upgrade pip numpy pypinyin sentencepiece
- name: Install sherpa-onnx
shell: bash

View File

@@ -45,7 +45,7 @@ jobs:
- name: Install Python dependencies
shell: bash
run: |
python3 -m pip install --upgrade pip numpy sentencepiece
python3 -m pip install --upgrade pip numpy pypinyin sentencepiece
- name: Install sherpa-onnx
shell: bash