Inverse text normalization API of streaming ASR for various programming languages (#1022)

This commit is contained in:
Fangjun Kuang
2024-06-18 13:42:17 +08:00
committed by GitHub
parent 349d957da2
commit 6789c909d2
64 changed files with 849 additions and 55 deletions

View File

@@ -71,6 +71,17 @@ git checkout .
pushd android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx
sed -i.bak s/"firstType = 9/firstType = $type1/" ./MainActivity.kt
sed -i.bak s/"secondType = 0/secondType = $type2/" ./MainActivity.kt
{% if first.rule_fsts %}
rule_fsts={{ first.rule_fsts }}
sed -i.bak s%"firstRuleFsts = null"%"firstRuleFsts = \"$rule_fsts\""% ./MainActivity.kt
{% endif %}
{% if second.rule_fsts %}
rule_fsts={{ second.rule_fsts }}
sed -i.bak s%"secondRuleFsts = null"%"secondRuleFsts = \"$rule_fsts\""% ./MainActivity.kt
{% endif %}
git diff
popd

View File

@@ -54,6 +54,12 @@ popd
git checkout .
pushd android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx
sed -i.bak s/"type = 0/type = $type/" ./MainActivity.kt
{% if model.rule_fsts %}
rule_fsts={{ model.rule_fsts }}
sed -i.bak s%"ruleFsts = null"%"ruleFsts = \"$rule_fsts\""% ./MainActivity.kt
{% endif %}
git diff
popd
@@ -84,6 +90,7 @@ for arch in arm64-v8a armeabi-v7a x86_64 x86; do
done
rm -rf ./android/SherpaOnnx/app/src/main/assets/$model_name
rm -rf ./android/SherpaOnnx/app/src/main/assets/*.fst
{% endfor %}
git checkout .

View File

@@ -56,6 +56,12 @@ popd
git checkout .
pushd android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx
sed -i.bak s/"asrModelType = 0/asrModelType = $type/" ./MainActivity.kt
{% if model.rule_fsts %}
rule_fsts={{ model.rule_fsts }}
sed -i.bak s%"asrRuleFsts = null"%"asrRuleFsts = \"$rule_fsts\""% ./MainActivity.kt
{% endif %}
git diff
popd

View File

@@ -41,6 +41,7 @@ class Model:
# cmd is used to remove extra file from the model directory
cmd: str = ""
rule_fsts: str = ""
def get_2nd_models():
@@ -70,7 +71,11 @@ def get_2nd_models():
idx=0,
lang="zh",
short_name="paraformer",
rule_fsts="itn_zh_number.fst",
cmd="""
if [ ! -f itn_zh_number.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi
pushd $model_name
rm -fv README.md
@@ -87,7 +92,11 @@ def get_2nd_models():
idx=4,
lang="zh",
short_name="zipformer",
rule_fsts="itn_zh_number.fst",
cmd="""
if [ ! -f itn_zh_number.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi
pushd $model_name
rm -rfv test_wavs
@@ -117,7 +126,11 @@ def get_1st_models():
idx=8,
lang="bilingual_zh_en",
short_name="zipformer",
rule_fsts="itn_zh_number.fst",
cmd="""
if [ ! -f itn_zh_number.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi
pushd $model_name
rm -fv decoder-epoch-99-avg-1.int8.onnx
rm -fv encoder-epoch-99-avg-1.onnx
@@ -160,7 +173,11 @@ def get_1st_models():
idx=3,
lang="zh",
short_name="zipformer2",
rule_fsts="itn_zh_number.fst",
cmd="""
if [ ! -f itn_zh_number.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi
pushd $model_name
rm -fv exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx
rm -fv exp/decoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx
@@ -202,7 +219,11 @@ def get_1st_models():
idx=9,
lang="zh",
short_name="small_zipformer",
rule_fsts="itn_zh_number.fst",
cmd="""
if [ ! -f itn_zh_number.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi
pushd $model_name
rm -fv encoder-epoch-99-avg-1.onnx
rm -fv decoder-epoch-99-avg-1.int8.onnx

View File

@@ -42,6 +42,8 @@ class Model:
# cmd is used to remove extra file from the model directory
cmd: str = ""
rule_fsts: str = ""
def get_models():
models = [
@@ -50,7 +52,11 @@ def get_models():
idx=8,
lang="bilingual_zh_en",
short_name="zipformer",
rule_fsts="itn_zh_number.fst",
cmd="""
if [ ! -f itn_zh_number.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi
pushd $model_name
rm -fv decoder-epoch-99-avg-1.int8.onnx
rm -fv encoder-epoch-99-avg-1.onnx
@@ -93,7 +99,11 @@ def get_models():
idx=3,
lang="zh",
short_name="zipformer2",
rule_fsts="itn_zh_number.fst",
cmd="""
if [ ! -f itn_zh_number.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi
pushd $model_name
rm -fv exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx
rm -fv exp/decoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx
@@ -135,7 +145,11 @@ def get_models():
idx=9,
lang="zh",
short_name="small_zipformer",
rule_fsts="itn_zh_number.fst",
cmd="""
if [ ! -f itn_zh_number.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi
pushd $model_name
rm -fv encoder-epoch-99-avg-1.onnx
rm -fv decoder-epoch-99-avg-1.int8.onnx

View File

@@ -42,6 +42,8 @@ class Model:
# cmd is used to remove extra file from the model directory
cmd: str = ""
rule_fsts: str = ""
# See get_2nd_models() in ./generate-asr-2pass-apk-script.py
def get_models():
@@ -71,7 +73,11 @@ def get_models():
idx=0,
lang="zh",
short_name="paraformer",
rule_fsts="itn_zh_number.fst",
cmd="""
if [ ! -f itn_zh_number.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi
pushd $model_name
rm -v README.md
@@ -88,7 +94,11 @@ def get_models():
idx=4,
lang="zh",
short_name="zipformer",
rule_fsts="itn_zh_number.fst",
cmd="""
if [ ! -f itn_zh_number.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi
pushd $model_name
rm -rfv test_wavs
@@ -171,7 +181,11 @@ def get_models():
idx=11,
lang="zh",
short_name="telespeech",
rule_fsts="itn_zh_number.fst",
cmd="""
if [ ! -f itn_zh_number.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi
pushd $model_name
rm -rfv test_wavs