Inverse text normalization API of streaming ASR for various programming languages (#1022)

This commit is contained in:
Fangjun Kuang
2024-06-18 13:42:17 +08:00
committed by GitHub
parent 349d957da2
commit 6789c909d2
64 changed files with 849 additions and 55 deletions

View File

@@ -71,6 +71,17 @@ git checkout .
pushd android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx
sed -i.bak s/"firstType = 9/firstType = $type1/" ./MainActivity.kt
sed -i.bak s/"secondType = 0/secondType = $type2/" ./MainActivity.kt
{% if first.rule_fsts %}
rule_fsts={{ first.rule_fsts }}
sed -i.bak s%"firstRuleFsts = null"%"firstRuleFsts = \"$rule_fsts\""% ./MainActivity.kt
{% endif %}
{% if second.rule_fsts %}
rule_fsts={{ second.rule_fsts }}
sed -i.bak s%"secondRuleFsts = null"%"secondRuleFsts = \"$rule_fsts\""% ./MainActivity.kt
{% endif %}
git diff
popd

View File

@@ -54,6 +54,12 @@ popd
git checkout .
pushd android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx
sed -i.bak s/"type = 0/type = $type/" ./MainActivity.kt
{% if model.rule_fsts %}
rule_fsts={{ model.rule_fsts }}
sed -i.bak s%"ruleFsts = null"%"ruleFsts = \"$rule_fsts\""% ./MainActivity.kt
{% endif %}
git diff
popd
@@ -84,6 +90,7 @@ for arch in arm64-v8a armeabi-v7a x86_64 x86; do
done
rm -rf ./android/SherpaOnnx/app/src/main/assets/$model_name
rm -rf ./android/SherpaOnnx/app/src/main/assets/*.fst
{% endfor %}
git checkout .

View File

@@ -56,6 +56,12 @@ popd
git checkout .
pushd android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx
sed -i.bak s/"asrModelType = 0/asrModelType = $type/" ./MainActivity.kt
{% if model.rule_fsts %}
rule_fsts={{ model.rule_fsts }}
sed -i.bak s%"asrRuleFsts = null"%"asrRuleFsts = \"$rule_fsts\""% ./MainActivity.kt
{% endif %}
git diff
popd

View File

@@ -41,6 +41,7 @@ class Model:
# cmd is used to remove extra file from the model directory
cmd: str = ""
rule_fsts: str = ""
def get_2nd_models():
@@ -70,7 +71,11 @@ def get_2nd_models():
idx=0,
lang="zh",
short_name="paraformer",
rule_fsts="itn_zh_number.fst",
cmd="""
if [ ! -f itn_zh_number.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi
pushd $model_name
rm -fv README.md
@@ -87,7 +92,11 @@ def get_2nd_models():
idx=4,
lang="zh",
short_name="zipformer",
rule_fsts="itn_zh_number.fst",
cmd="""
if [ ! -f itn_zh_number.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi
pushd $model_name
rm -rfv test_wavs
@@ -117,7 +126,11 @@ def get_1st_models():
idx=8,
lang="bilingual_zh_en",
short_name="zipformer",
rule_fsts="itn_zh_number.fst",
cmd="""
if [ ! -f itn_zh_number.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi
pushd $model_name
rm -fv decoder-epoch-99-avg-1.int8.onnx
rm -fv encoder-epoch-99-avg-1.onnx
@@ -160,7 +173,11 @@ def get_1st_models():
idx=3,
lang="zh",
short_name="zipformer2",
rule_fsts="itn_zh_number.fst",
cmd="""
if [ ! -f itn_zh_number.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi
pushd $model_name
rm -fv exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx
rm -fv exp/decoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx
@@ -202,7 +219,11 @@ def get_1st_models():
idx=9,
lang="zh",
short_name="small_zipformer",
rule_fsts="itn_zh_number.fst",
cmd="""
if [ ! -f itn_zh_number.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi
pushd $model_name
rm -fv encoder-epoch-99-avg-1.onnx
rm -fv decoder-epoch-99-avg-1.int8.onnx

View File

@@ -42,6 +42,8 @@ class Model:
# cmd is used to remove extra file from the model directory
cmd: str = ""
rule_fsts: str = ""
def get_models():
models = [
@@ -50,7 +52,11 @@ def get_models():
idx=8,
lang="bilingual_zh_en",
short_name="zipformer",
rule_fsts="itn_zh_number.fst",
cmd="""
if [ ! -f itn_zh_number.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi
pushd $model_name
rm -fv decoder-epoch-99-avg-1.int8.onnx
rm -fv encoder-epoch-99-avg-1.onnx
@@ -93,7 +99,11 @@ def get_models():
idx=3,
lang="zh",
short_name="zipformer2",
rule_fsts="itn_zh_number.fst",
cmd="""
if [ ! -f itn_zh_number.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi
pushd $model_name
rm -fv exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx
rm -fv exp/decoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx
@@ -135,7 +145,11 @@ def get_models():
idx=9,
lang="zh",
short_name="small_zipformer",
rule_fsts="itn_zh_number.fst",
cmd="""
if [ ! -f itn_zh_number.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi
pushd $model_name
rm -fv encoder-epoch-99-avg-1.onnx
rm -fv decoder-epoch-99-avg-1.int8.onnx

View File

@@ -42,6 +42,8 @@ class Model:
# cmd is used to remove extra file from the model directory
cmd: str = ""
rule_fsts: str = ""
# See get_2nd_models() in ./generate-asr-2pass-apk-script.py
def get_models():
@@ -71,7 +73,11 @@ def get_models():
idx=0,
lang="zh",
short_name="paraformer",
rule_fsts="itn_zh_number.fst",
cmd="""
if [ ! -f itn_zh_number.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi
pushd $model_name
rm -v README.md
@@ -88,7 +94,11 @@ def get_models():
idx=4,
lang="zh",
short_name="zipformer",
rule_fsts="itn_zh_number.fst",
cmd="""
if [ ! -f itn_zh_number.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi
pushd $model_name
rm -rfv test_wavs
@@ -171,7 +181,11 @@ def get_models():
idx=11,
lang="zh",
short_name="telespeech",
rule_fsts="itn_zh_number.fst",
cmd="""
if [ ! -f itn_zh_number.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi
pushd $model_name
rm -rfv test_wavs

View File

@@ -26,6 +26,8 @@ namespace SherpaOnnx
HotwordsFile = "";
HotwordsScore = 1.5F;
CtcFstDecoderConfig = new OnlineCtcFstDecoderConfig();
RuleFsts = "";
RuleFars = "";
}
public FeatureConfig FeatConfig;
public OnlineModelConfig ModelConfig;
@@ -64,5 +66,11 @@ namespace SherpaOnnx
public float HotwordsScore;
public OnlineCtcFstDecoderConfig CtcFstDecoderConfig;
[MarshalAs(UnmanagedType.LPStr)]
public string RuleFsts;
[MarshalAs(UnmanagedType.LPStr)]
public string RuleFars;
}
}

View File

@@ -0,0 +1 @@
../../../../go-api-examples/streaming-decode-files/run-transducer-itn.sh

View File

@@ -79,8 +79,8 @@ function osx() {
mkdir t
cd t
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_x86_64.whl
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_x86_64.whl
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp39-cp39-macosx_11_0_x86_64.whl
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp39-cp39-macosx_11_0_x86_64.whl
cp -v sherpa_onnx/lib/*.dylib $dst/
@@ -93,8 +93,8 @@ function osx() {
mkdir t
cd t
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_arm64.whl
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_arm64.whl
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp39-cp39-macosx_11_0_arm64.whl
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp39-cp39-macosx_11_0_arm64.whl
cp -v sherpa_onnx/lib/*.dylib $dst/
@@ -126,7 +126,6 @@ function windows() {
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl
cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll $dst
cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.lib $dst
cd ..
rm -rf t
@@ -139,7 +138,6 @@ function windows() {
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl
cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll $dst
cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.lib $dst
cd ..
rm -rf t

View File

@@ -127,7 +127,11 @@ type OnlineRecognizerConfig struct {
Rule1MinTrailingSilence float32
Rule2MinTrailingSilence float32
Rule3MinUtteranceLength float32
HotwordsFile string
HotwordsScore float32
CtcFstDecoderConfig OnlineCtcFstDecoderConfig
RuleFsts string
RuleFars string
}
// It contains the recognition result for a online stream.
@@ -204,6 +208,17 @@ func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer {
c.rule2_min_trailing_silence = C.float(config.Rule2MinTrailingSilence)
c.rule3_min_utterance_length = C.float(config.Rule3MinUtteranceLength)
c.hotwords_file = C.CString(config.HotwordsFile)
defer C.free(unsafe.Pointer(c.hotwords_file))
c.hotwords_score = C.float(config.HotwordsScore)
c.rule_fsts = C.CString(config.RuleFsts)
defer C.free(unsafe.Pointer(c.rule_fsts))
c.rule_fars = C.CString(config.RuleFars)
defer C.free(unsafe.Pointer(c.rule_fars))
c.ctc_fst_decoder_config.graph = C.CString(config.CtcFstDecoderConfig.Graph)
defer C.free(unsafe.Pointer(c.ctc_fst_decoder_config.graph))
c.ctc_fst_decoder_config.max_active = C.int(config.CtcFstDecoderConfig.MaxActive)

View File

@@ -189,6 +189,8 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
rule3MinUtteranceLength);
SHERPA_ONNX_ASSIGN_ATTR_STR(hotwords_file, hotwordsFile);
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(hotwords_score, hotwordsScore);
SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fsts, ruleFsts);
SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars);
c.ctc_fst_decoder_config = GetCtcFstDecoderConfig(o);
@@ -246,6 +248,14 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
delete[] c.hotwords_file;
}
if (c.rule_fsts) {
delete[] c.rule_fsts;
}
if (c.rule_fars) {
delete[] c.rule_fars;
}
if (c.ctc_fst_decoder_config.graph) {
delete[] c.ctc_fst_decoder_config.graph;
}