Add inverse text normalization for non-streaming ASR (#1017)
This commit is contained in:
81
python-api-examples/inverse-text-normalization-offline-asr.py
Executable file
81
python-api-examples/inverse-text-normalization-offline-asr.py
Executable file
@@ -0,0 +1,81 @@
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# Copyright (c) 2024 Xiaomi Corporation
|
||||
|
||||
"""
|
||||
This script shows how to use inverse text normalization with non-streaming ASR.
|
||||
|
||||
Usage:
|
||||
|
||||
(1) Download the test model
|
||||
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
|
||||
tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
|
||||
rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
|
||||
|
||||
(2) Download rule fst
|
||||
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||
|
||||
Please refer to
|
||||
https://github.com/k2-fsa/colab/blob/master/sherpa-onnx/itn_zh_number.ipynb
|
||||
for how itn_zh_number.fst is generated.
|
||||
|
||||
(3) Download test wave
|
||||
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
|
||||
|
||||
(4) Run this script
|
||||
|
||||
python3 ./python-api-examples/inverse-text-normalization-offline-asr.py
|
||||
"""
|
||||
from pathlib import Path
|
||||
|
||||
import sherpa_onnx
|
||||
import soundfile as sf
|
||||
|
||||
|
||||
def create_recognizer():
|
||||
model = "./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx"
|
||||
tokens = "./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt"
|
||||
rule_fsts = "./itn_zh_number.fst"
|
||||
|
||||
if (
|
||||
not Path(model).is_file()
|
||||
or not Path(tokens).is_file()
|
||||
or not Path(rule_fsts).is_file()
|
||||
):
|
||||
raise ValueError(
|
||||
"""Please download model files from
|
||||
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||
"""
|
||||
)
|
||||
return sherpa_onnx.OfflineRecognizer.from_paraformer(
|
||||
paraformer=model,
|
||||
tokens=tokens,
|
||||
debug=True,
|
||||
rule_fsts=rule_fsts,
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
recognizer = create_recognizer()
|
||||
wave_filename = "./itn-zh-number.wav"
|
||||
if not Path(wave_filename).is_file():
|
||||
raise ValueError(
|
||||
"""Please download model files from
|
||||
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||
"""
|
||||
)
|
||||
audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
|
||||
audio = audio[:, 0] # only use the first channel
|
||||
|
||||
stream = recognizer.create_stream()
|
||||
stream.accept_waveform(sample_rate, audio)
|
||||
recognizer.decode_stream(stream)
|
||||
print(wave_filename)
|
||||
print(stream.result)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user