Add links to pre-built APKs and pre-trained models to README. (#840)

This commit is contained in:
Fangjun Kuang
2024-05-07 12:28:42 +08:00
committed by GitHub
parent 37a4135dd7
commit d2e86b0415
20 changed files with 1395 additions and 614 deletions

View File

@@ -0,0 +1,109 @@
#!/usr/bin/env bash
#
# Auto generated! Please DO NOT EDIT!
# Please set the environment variable ANDROID_NDK
# before running this script
# Inside the $ANDROID_NDK directory, you can find a binary ndk-build
# and some other files like the file "build/cmake/android.toolchain.cmake"
set -ex
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
log "Building streaming ASR two-pass APK for sherpa-onnx v${SHERPA_ONNX_VERSION}"
export SHERPA_ONNX_ENABLE_TTS=OFF
log "====================arm64-v8a================="
./build-android-arm64-v8a.sh
log "====================armv7-eabi================"
./build-android-armv7-eabi.sh
log "====================x86-64===================="
./build-android-x86-64.sh
log "====================x86===================="
./build-android-x86.sh
mkdir -p apks
{% for first, second in model_list %}
pushd ./android/SherpaOnnx2Pass/app/src/main/assets/
model_name1={{ first.model_name }}
model_name=$model_name1
type1={{ first.idx }}
lang1={{ first.lang }}
short_name1={{ first.short_name }}
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/${model_name1}.tar.bz2
tar xvf ${model_name1}.tar.bz2
{{ first.cmd }}
rm -rf *.tar.bz2
ls -lh $model_name1
model_name2={{ second.model_name }}
model_name=$model_name2
type2={{ second.idx }}
lang2={{ second.lang }}
short_name2={{ second.short_name }}
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/${model_name2}.tar.bz2
tar xvf ${model_name2}.tar.bz2
{{ second.cmd }}
rm -rf *.tar.bz2
ls -lh $model_name2
popd
# Now we are at the project root directory
git checkout .
pushd android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx
sed -i.bak s/"firstType = 9/firstType = $type1/" ./MainActivity.kt
sed -i.bak s/"secondType = 0/secondType = $type2/" ./MainActivity.kt
git diff
popd
for arch in arm64-v8a armeabi-v7a x86_64 x86; do
log "------------------------------------------------------------"
log "build ASR apk for $arch"
log "------------------------------------------------------------"
src_arch=$arch
if [ $arch == "armeabi-v7a" ]; then
src_arch=armv7-eabi
elif [ $arch == "x86_64" ]; then
src_arch=x86-64
fi
ls -lh ./build-android-$src_arch/install/lib/*.so
cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnx2Pass/app/src/main/jniLibs/$arch/
pushd ./android/SherpaOnnx2Pass
sed -i.bak s/2048/9012/g ./gradle.properties
git diff ./gradle.properties
./gradlew assembleRelease
popd
mv android/SherpaOnnx2Pass/app/build/outputs/apk/release/app-release-unsigned.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-asr_2pass-$lang1-${short_name1}_${short_name2}.apk
ls -lh apks
rm -v ./android/SherpaOnnx2Pass/app/src/main/jniLibs/$arch/*.so
done
rm -rf ./android/SherpaOnnx2Pass/app/src/main/assets/$model_name1
rm -rf ./android/SherpaOnnx2Pass/app/src/main/assets/$model_name2
{% endfor %}
git checkout .
ls -lh apks/

150
scripts/apk/build-apk-kws.sh Executable file
View File

@@ -0,0 +1,150 @@
#!/usr/bin/env bash
# Please set the environment variable ANDROID_NDK
# before running this script
# Inside the $ANDROID_NDK directory, you can find a binary ndk-build
# and some other files like the file "build/cmake/android.toolchain.cmake"
set -e
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
log "Building keyword spotting APK for sherpa-onnx v${SHERPA_ONNX_VERSION}"
export SHERPA_ONNX_ENABLE_TTS=OFF
log "====================arm64-v8a================="
./build-android-arm64-v8a.sh
log "====================armv7-eabi================"
./build-android-armv7-eabi.sh
log "====================x86-64===================="
./build-android-x86-64.sh
log "====================x86===================="
./build-android-x86.sh
mkdir -p apks
# Download the model
repo=sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01
if [ ! -d ./android/SherpaOnnxKws/app/src/main/assets/$repo ]; then
repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
log "Start testing ${repo_url}"
log "Download pretrained model from $repo_url"
curl -SL -O $repo_url
tar jxvf ${repo}.tar.bz2
rm ${repo}.tar.bz2
pushd $repo
rm configuration.json
rm keywords_raw.txt
rm *.int8.onnx
rm README.md
rm -rfv test_wavs
ls -lh
popd
mv -v $repo ./android/SherpaOnnxKws/app/src/main/assets/
fi
tree ./android/SherpaOnnxKws/app/src/main/assets/
for arch in arm64-v8a armeabi-v7a x86_64 x86; do
log "------------------------------------------------------------"
log "build apk for $arch"
log "------------------------------------------------------------"
src_arch=$arch
if [ $arch == "armeabi-v7a" ]; then
src_arch=armv7-eabi
elif [ $arch == "x86_64" ]; then
src_arch=x86-64
fi
ls -lh ./build-android-$src_arch/install/lib/*.so
cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnxKws/app/src/main/jniLibs/$arch/
pushd ./android/SherpaOnnxKws
sed -i.bak s/2048/9012/g ./gradle.properties
git diff ./gradle.properties
./gradlew assembleRelease
popd
mv android/SherpaOnnxKws/app/build/outputs/apk/release/app-release-unsigned.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-kws-zh-wenetspeech-zipformer.apk
ls -lh apks
rm -v ./android/SherpaOnnxKws/app/src/main/jniLibs/$arch/*.so
done
git checkout .
rm -rf ./android/SherpaOnnxKws/app/src/main/assets/$repo
# English model
repo=sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01
if [ ! -d ./android/SherpaOnnxKws/app/src/main/assets/$repo ]; then
repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01.tar.bz2
log "Start testing ${repo_url}"
log "Download pretrained model from $repo_url"
curl -SL -O $repo_url
tar jxvf ${repo}.tar.bz2
rm ${repo}.tar.bz2
pushd $repo
rm bpe.model
rm keywords_raw.txt
rm *.int8.onnx
rm README.md
rm -rfv test_wavs
ls -lh
popd
mv -v $repo ./android/SherpaOnnxKws/app/src/main/assets/
fi
tree ./android/SherpaOnnxKws/app/src/main/assets/
pushd android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx
sed -i.bak s/"type = 0"/"type = 1"/ ./MainActivity.kt
git diff
popd
for arch in arm64-v8a armeabi-v7a x86_64 x86; do
log "------------------------------------------------------------"
log "build apk for $arch"
log "------------------------------------------------------------"
src_arch=$arch
if [ $arch == "armeabi-v7a" ]; then
src_arch=armv7-eabi
elif [ $arch == "x86_64" ]; then
src_arch=x86-64
fi
ls -lh ./build-android-$src_arch/install/lib/*.so
cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnxKws/app/src/main/jniLibs/$arch/
pushd ./android/SherpaOnnxKws
sed -i.bak s/2048/9012/g ./gradle.properties
git diff ./gradle.properties
./gradlew assembleRelease
popd
mv android/SherpaOnnxKws/app/build/outputs/apk/release/app-release-unsigned.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-kws-en-gigaspeech-zipformer.apk
ls -lh apks
rm -v ./android/SherpaOnnxKws/app/src/main/jniLibs/$arch/*.so
done
git checkout .
rm -rf ./android/SherpaOnnxKws/app/src/main/assets/$repo

View File

@@ -0,0 +1,93 @@
#!/usr/bin/env bash
#
# Auto generated! Please DO NOT EDIT!
# Please set the environment variable ANDROID_NDK
# before running this script
# Inside the $ANDROID_NDK directory, you can find a binary ndk-build
# and some other files like the file "build/cmake/android.toolchain.cmake"
set -ex
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
log "Building streaming VAD + ASR APK for sherpa-onnx v${SHERPA_ONNX_VERSION}"
export SHERPA_ONNX_ENABLE_TTS=OFF
log "====================arm64-v8a================="
./build-android-arm64-v8a.sh
log "====================armv7-eabi================"
./build-android-armv7-eabi.sh
log "====================x86-64===================="
./build-android-x86-64.sh
log "====================x86===================="
./build-android-x86.sh
mkdir -p apks
{% for model in model_list %}
pushd ./android/SherpaOnnxVadAsr/app/src/main/assets/
model_name={{ model.model_name }}
type={{ model.idx }}
lang={{ model.lang }}
short_name={{ model.short_name }}
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/${model_name}.tar.bz2
tar xvf ${model_name}.tar.bz2
{{ model.cmd }}
rm -rf *.tar.bz2
ls -lh $model_name
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
popd
# Now we are at the project root directory
git checkout .
pushd android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx
sed -i.bak s/"asrModelType = 0/asrModelType = $type/" ./MainActivity.kt
git diff
popd
for arch in arm64-v8a armeabi-v7a x86_64 x86; do
log "------------------------------------------------------------"
log "build ASR apk for $arch"
log "------------------------------------------------------------"
src_arch=$arch
if [ $arch == "armeabi-v7a" ]; then
src_arch=armv7-eabi
elif [ $arch == "x86_64" ]; then
src_arch=x86-64
fi
ls -lh ./build-android-$src_arch/install/lib/*.so
cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnxVadAsr/app/src/main/jniLibs/$arch/
pushd ./android/SherpaOnnxVadAsr
sed -i.bak s/2048/9012/g ./gradle.properties
git diff ./gradle.properties
./gradlew assembleRelease
popd
mv android/SherpaOnnxVadAsr/app/build/outputs/apk/release/app-release-unsigned.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-vad_asr-$lang-$short_name.apk
ls -lh apks
rm -v ./android/SherpaOnnxVadAsr/app/src/main/jniLibs/$arch/*.so
done
rm -rf ./android/SherpaOnnxVadAsr/app/src/main/assets/$model_name
{% endfor %}
git checkout .
ls -lh apks/

70
scripts/apk/build-apk-vad.sh Executable file
View File

@@ -0,0 +1,70 @@
#!/usr/bin/env bash
# Please set the environment variable ANDROID_NDK
# before running this script
# Inside the $ANDROID_NDK directory, you can find a binary ndk-build
# and some other files like the file "build/cmake/android.toolchain.cmake"
set -e
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
log "Building APK for sherpa-onnx v${SHERPA_ONNX_VERSION}"
export SHERPA_ONNX_ENABLE_TTS=OFF
log "====================arm64-v8a================="
./build-android-arm64-v8a.sh
log "====================armv7-eabi================"
./build-android-armv7-eabi.sh
log "====================x86-64===================="
./build-android-x86-64.sh
log "====================x86===================="
./build-android-x86.sh
mkdir -p apks
log "https://github.com/snakers4/silero-vad/raw/master/files/silero_vad.onnx"
# Download the model
pushd ./android/SherpaOnnxVad/app/src/main/assets/
wget -c https://github.com/snakers4/silero-vad/raw/master/files/silero_vad.onnx
popd
for arch in arm64-v8a armeabi-v7a x86_64 x86; do
log "------------------------------------------------------------"
log "build apk for $arch"
log "------------------------------------------------------------"
src_arch=$arch
if [ $arch == "armeabi-v7a" ]; then
src_arch=armv7-eabi
elif [ $arch == "x86_64" ]; then
src_arch=x86-64
fi
ls -lh ./build-android-$src_arch/install/lib/*.so
cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnxVad/app/src/main/jniLibs/$arch/
pushd ./android/SherpaOnnxVad
sed -i.bak s/2048/9012/g ./gradle.properties
git diff ./gradle.properties
./gradlew assembleRelease
popd
mv android/SherpaOnnxVad/app/build/outputs/apk/release/app-release-unsigned.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-silero_vad.apk
ls -lh apks
rm -v ./android/SherpaOnnxVad/app/src/main/jniLibs/$arch/*.so
done
rm -rf ./android/SherpaOnnxVad/app/src/main/assets/*.onnx
ls -lh apks/

View File

@@ -0,0 +1,326 @@
#!/usr/bin/env python3
import argparse
from dataclasses import dataclass
from typing import List, Optional
import jinja2
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--total",
type=int,
default=1,
help="Number of runners",
)
parser.add_argument(
"--index",
type=int,
default=0,
help="Index of the current runner",
)
return parser.parse_args()
@dataclass
class Model:
# We will download
# https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/{model_name}.tar.bz2
model_name: str
# The type of the model, e..g, 0, 1, 2. It is hardcoded in the kotlin code
idx: int
# e.g., zh, en, zh_en
lang: str
# e.g., whisper, paraformer, zipformer
short_name: str = ""
# cmd is used to remove extra file from the model directory
cmd: str = ""
def get_2nd_models():
models = [
Model(
model_name="sherpa-onnx-whisper-tiny.en",
idx=2,
lang="en",
short_name="whisper_tiny",
cmd="""
pushd $model_name
rm -v tiny.en-encoder.onnx
rm -v tiny.en-decoder.onnx
rm -rf test_wavs
rm -v *.py
rm -v requirements.txt
rm -v .gitignore
rm -v README.md
ls -lh
popd
""",
),
Model(
model_name="sherpa-onnx-paraformer-zh-2023-03-28",
idx=0,
lang="zh",
short_name="paraformer",
cmd="""
pushd $model_name
rm -v README.md
rm -rfv test_wavs
rm model.onnx
ls -lh
popd
""",
),
Model(
model_name="icefall-asr-zipformer-wenetspeech-20230615",
idx=4,
lang="zh",
short_name="zipformer",
cmd="""
pushd $model_name
rm -rfv test_wavs
rm -v README.md
mv -v data/lang_char/tokens.txt ./
rm -rfv data/lang_char
mv -v exp/encoder-epoch-12-avg-4.int8.onnx ./
mv -v exp/decoder-epoch-12-avg-4.onnx ./
mv -v exp/joiner-epoch-12-avg-4.int8.onnx ./
rm -rfv exp
ls -lh
popd
""",
),
]
return models
def get_1st_models():
# See as ./generate-asr-apk-script.py
models = [
Model(
model_name="sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20",
idx=8,
lang="bilingual_zh_en",
short_name="zipformer",
cmd="""
pushd $model_name
rm -v decoder-epoch-99-avg-1.int8.onnx
rm -v encoder-epoch-99-avg-1.onnx
rm -v joiner-epoch-99-avg-1.onnx
rm -v *.sh
rm -v bpe.model
rm -v README.md
rm -v .gitattributes
rm -v *state*
rm -rfv test_wavs
ls -lh
popd
""",
),
Model(
model_name="sherpa-onnx-streaming-zipformer-en-2023-06-26",
idx=6,
lang="en",
short_name="zipformer2",
cmd="""
pushd $model_name
rm -v encoder-epoch-99-avg-1-chunk-16-left-128.onnx
rm -v decoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx
rm -v joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx
rm -v README.md
rm -v bpe.model
rm -rfv test_wavs
ls -lh
popd
""",
),
Model(
model_name="icefall-asr-zipformer-streaming-wenetspeech-20230615",
idx=3,
lang="zh",
short_name="zipformer2",
cmd="""
pushd $model_name
rm -v exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx
rm -v exp/decoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx
rm -v exp/joiner-epoch-12-avg-4-chunk-16-left-128.int8.onnx
rm -v data/lang_char/lexicon.txt
rm -v data/lang_char/words.txt
rm -rfv test_wavs
rm -v README.md
ls -lh exp/
ls -lh data/lang_char
popd
""",
),
Model(
model_name="sherpa-onnx-streaming-zipformer-fr-2023-04-14",
idx=7,
lang="fr",
short_name="zipformer",
cmd="""
pushd $model_name
rm -v encoder-epoch-29-avg-9-with-averaged-model.onnx
rm -v decoder-epoch-29-avg-9-with-averaged-model.int8.onnx
rm -v joiner-epoch-29-avg-9-with-averaged-model.int8.onnx
rm -v *.sh
rm -rf test_wavs
rm README.md
ls -lh
popd
""",
),
Model(
model_name="sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23",
idx=9,
lang="zh",
short_name="small_zipformer",
cmd="""
pushd $model_name
rm -v encoder-epoch-99-avg-1.onnx
rm -v decoder-epoch-99-avg-1.int8.onnx
rm -v joiner-epoch-99-avg-1.onnx
rm -v *.sh
rm -rf test_wavs
rm README.md
ls -lh
popd
""",
),
Model(
model_name="sherpa-onnx-streaming-zipformer-en-20M-2023-02-17",
idx=10,
lang="en",
short_name="small_zipformer",
cmd="""
pushd $model_name
rm -v encoder-epoch-99-avg-1.onnx
rm -v decoder-epoch-99-avg-1.int8.onnx
rm -v joiner-epoch-99-avg-1.onnx
rm -v *.sh
rm -rf test_wavs
rm README.md
ls -lh
popd
""",
),
]
return models
def get_models():
first = get_1st_models()
second = get_2nd_models()
combinations = [
(
"sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23",
"sherpa-onnx-paraformer-zh-2023-03-28",
),
(
"sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23",
"icefall-asr-zipformer-wenetspeech-20230615",
),
(
"sherpa-onnx-streaming-zipformer-en-20M-2023-02-17",
"sherpa-onnx-whisper-tiny.en",
),
]
models = []
for f, s in combinations:
t = []
for m in first:
if m.model_name == f:
t.append(m)
break
assert len(t) == 1, (f, s, first, second)
for m in second:
if m.model_name == s:
t.append(m)
break
assert len(t) == 2, (f, s, first, second)
models.append(t)
return models
def main():
args = get_args()
index = args.index
total = args.total
assert 0 <= index < total, (index, total)
all_model_list = get_models()
num_models = len(all_model_list)
num_per_runner = num_models // total
if num_per_runner <= 0:
raise ValueError(f"num_models: {num_models}, num_runners: {total}")
start = index * num_per_runner
end = start + num_per_runner
remaining = num_models - args.total * num_per_runner
print(f"{index}/{total}: {start}-{end}/{num_models}")
d = dict()
d["model_list"] = all_model_list[start:end]
if index < remaining:
s = args.total * num_per_runner + index
d["model_list"].append(all_model_list[s])
print(f"{s}/{num_models}")
filename_list = [
"./build-apk-asr-2pass.sh",
]
for filename in filename_list:
environment = jinja2.Environment()
with open(f"{filename}.in") as f:
s = f.read()
template = environment.from_string(s)
s = template.render(**d)
with open(filename, "w") as f:
print(s, file=f)
if __name__ == "__main__":
main()

View File

@@ -57,12 +57,116 @@ def get_models():
rm -v joiner-epoch-99-avg-1.onnx
rm -v *.sh
rm -v bpe.model
rm -v README.md
rm -v .gitattributes
rm -v *state*
rm -rfv test_wavs
ls -lh
popd
""",
),
Model(
model_name="sherpa-onnx-streaming-zipformer-en-2023-06-26",
idx=6,
lang="en",
short_name="zipformer2",
cmd="""
pushd $model_name
rm -v encoder-epoch-99-avg-1-chunk-16-left-128.onnx
rm -v decoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx
rm -v joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx
rm -v README.md
rm -v bpe.model
rm -rfv test_wavs
ls -lh
popd
""",
),
Model(
model_name="icefall-asr-zipformer-streaming-wenetspeech-20230615",
idx=3,
lang="zh",
short_name="zipformer2",
cmd="""
pushd $model_name
rm -v exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx
rm -v exp/decoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx
rm -v exp/joiner-epoch-12-avg-4-chunk-16-left-128.int8.onnx
rm -v data/lang_char/lexicon.txt
rm -v data/lang_char/words.txt
rm -rfv test_wavs
rm -v README.md
ls -lh exp/
ls -lh data/lang_char
popd
""",
),
Model(
model_name="sherpa-onnx-streaming-zipformer-fr-2023-04-14",
idx=7,
lang="fr",
short_name="zipformer",
cmd="""
pushd $model_name
rm -v encoder-epoch-29-avg-9-with-averaged-model.onnx
rm -v decoder-epoch-29-avg-9-with-averaged-model.int8.onnx
rm -v joiner-epoch-29-avg-9-with-averaged-model.int8.onnx
rm -v *.sh
rm -rf test_wavs
rm README.md
ls -lh
popd
""",
),
Model(
model_name="sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23",
idx=9,
lang="zh",
short_name="small_zipformer",
cmd="""
pushd $model_name
rm -v encoder-epoch-99-avg-1.onnx
rm -v decoder-epoch-99-avg-1.int8.onnx
rm -v joiner-epoch-99-avg-1.onnx
rm -v *.sh
rm -rf test_wavs
rm README.md
ls -lh
popd
""",
),
Model(
model_name="sherpa-onnx-streaming-zipformer-en-20M-2023-02-17",
idx=10,
lang="en",
short_name="small_zipformer",
cmd="""
pushd $model_name
rm -v encoder-epoch-99-avg-1.onnx
rm -v decoder-epoch-99-avg-1.int8.onnx
rm -v joiner-epoch-99-avg-1.onnx
rm -v *.sh
rm -rf test_wavs
rm README.md
ls -lh
popd
""",
),

View File

@@ -0,0 +1,156 @@
#!/usr/bin/env python3
import argparse
from dataclasses import dataclass
from typing import List, Optional
import jinja2
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--total",
type=int,
default=1,
help="Number of runners",
)
parser.add_argument(
"--index",
type=int,
default=0,
help="Index of the current runner",
)
return parser.parse_args()
@dataclass
class Model:
# We will download
# https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/{model_name}.tar.bz2
model_name: str
# The type of the model, e..g, 0, 1, 2. It is hardcoded in the kotlin code
idx: int
# e.g., zh, en, zh_en
lang: str
# e.g., whisper, paraformer, zipformer
short_name: str = ""
# cmd is used to remove extra file from the model directory
cmd: str = ""
# See get_2nd_models() in ./generate-asr-2pass-apk-script.py
def get_models():
models = [
Model(
model_name="sherpa-onnx-whisper-tiny.en",
idx=2,
lang="en",
short_name="whisper_tiny",
cmd="""
pushd $model_name
rm -v tiny.en-encoder.onnx
rm -v tiny.en-decoder.onnx
rm -rf test_wavs
rm -v *.py
rm -v requirements.txt
rm -v .gitignore
rm -v README.md
ls -lh
popd
""",
),
Model(
model_name="sherpa-onnx-paraformer-zh-2023-03-28",
idx=0,
lang="zh",
short_name="paraformer",
cmd="""
pushd $model_name
rm -v README.md
rm -rfv test_wavs
rm model.onnx
ls -lh
popd
""",
),
Model(
model_name="icefall-asr-zipformer-wenetspeech-20230615",
idx=4,
lang="zh",
short_name="zipformer",
cmd="""
pushd $model_name
rm -rfv test_wavs
rm -v README.md
mv -v data/lang_char/tokens.txt ./
rm -rfv data/lang_char
mv -v exp/encoder-epoch-12-avg-4.int8.onnx ./
mv -v exp/decoder-epoch-12-avg-4.onnx ./
mv -v exp/joiner-epoch-12-avg-4.int8.onnx ./
rm -rfv exp
ls -lh
popd
""",
),
]
return models
def main():
args = get_args()
index = args.index
total = args.total
assert 0 <= index < total, (index, total)
all_model_list = get_models()
num_models = len(all_model_list)
num_per_runner = num_models // total
if num_per_runner <= 0:
raise ValueError(f"num_models: {num_models}, num_runners: {total}")
start = index * num_per_runner
end = start + num_per_runner
remaining = num_models - args.total * num_per_runner
print(f"{index}/{total}: {start}-{end}/{num_models}")
d = dict()
d["model_list"] = all_model_list[start:end]
if index < remaining:
s = args.total * num_per_runner + index
d["model_list"].append(all_model_list[s])
print(f"{s}/{num_models}")
filename_list = [
"./build-apk-vad-asr.sh",
]
for filename in filename_list:
environment = jinja2.Environment()
with open(f"{filename}.in") as f:
s = f.read()
template = environment.from_string(s)
s = template.render(**d)
with open(filename, "w") as f:
print(s, file=f)
if __name__ == "__main__":
main()