diff --git a/.github/scripts/test-offline-transducer.sh b/.github/scripts/test-offline-transducer.sh index 9c69bed0..d0544847 100755 --- a/.github/scripts/test-offline-transducer.sh +++ b/.github/scripts/test-offline-transducer.sh @@ -30,7 +30,7 @@ $EXE \ --model-type=nemo_transducer \ ./sherpa-onnx-nemo-transducer-giga-am-v2-russian-2025-04-19/test_wavs/example.wav -rm sherpa-onnx-nemo-transducer-giga-am-v2-russian-2025-04-19 +rm -rf sherpa-onnx-nemo-transducer-giga-am-v2-russian-2025-04-19 log "------------------------------------------------------------------------" diff --git a/.github/workflows/rknn-linux-aarch64.yaml b/.github/workflows/rknn-linux-aarch64.yaml index 7ed238d7..d413ddef 100644 --- a/.github/workflows/rknn-linux-aarch64.yaml +++ b/.github/workflows/rknn-linux-aarch64.yaml @@ -28,7 +28,7 @@ on: workflow_dispatch: concurrency: - group: aarch64-linux-gnu-shared-${{ github.ref }} + group: rknn-linux-aarch64-${{ github.ref }} cancel-in-progress: true jobs: diff --git a/python-api-examples/speech-recognition-from-microphone-with-endpoint-detection-alsa.py b/python-api-examples/speech-recognition-from-microphone-with-endpoint-detection-alsa.py index 81d5ae9b..67fb0051 100755 --- a/python-api-examples/speech-recognition-from-microphone-with-endpoint-detection-alsa.py +++ b/python-api-examples/speech-recognition-from-microphone-with-endpoint-detection-alsa.py @@ -106,6 +106,27 @@ def get_args(): """, ) + parser.add_argument( + "--hr-dict-dir", + type=str, + default="", + help="If not empty, it is the jieba dict directory for homophone replacer", + ) + + parser.add_argument( + "--hr-lexicon", + type=str, + default="", + help="If not empty, it is the lexicon.txt for homophone replacer", + ) + + parser.add_argument( + "--hr-rule-fsts", + type=str, + default="", + help="If not empty, it is the replace.fst for homophone replacer", + ) + parser.add_argument( "--device-name", type=str, @@ -159,6 +180,9 @@ def create_recognizer(args): hotwords_file=args.hotwords_file, hotwords_score=args.hotwords_score, blank_penalty=args.blank_penalty, + hr_dict_dir=args.hr_dict_dir, + hr_rule_fsts=args.hr_rule_fsts, + hr_lexicon=args.hr_lexicon, ) return recognizer diff --git a/python-api-examples/speech-recognition-from-microphone-with-endpoint-detection.py b/python-api-examples/speech-recognition-from-microphone-with-endpoint-detection.py index 3a2ff3b8..e47cf496 100755 --- a/python-api-examples/speech-recognition-from-microphone-with-endpoint-detection.py +++ b/python-api-examples/speech-recognition-from-microphone-with-endpoint-detection.py @@ -114,6 +114,27 @@ def get_args(): """, ) + parser.add_argument( + "--hr-dict-dir", + type=str, + default="", + help="If not empty, it is the jieba dict directory for homophone replacer", + ) + + parser.add_argument( + "--hr-lexicon", + type=str, + default="", + help="If not empty, it is the lexicon.txt for homophone replacer", + ) + + parser.add_argument( + "--hr-rule-fsts", + type=str, + default="", + help="If not empty, it is the replace.fst for homophone replacer", + ) + return parser.parse_args() @@ -142,6 +163,9 @@ def create_recognizer(args): hotwords_file=args.hotwords_file, hotwords_score=args.hotwords_score, blank_penalty=args.blank_penalty, + hr_dict_dir=args.hr_dict_dir, + hr_rule_fsts=args.hr_rule_fsts, + hr_lexicon=args.hr_lexicon, ) return recognizer diff --git a/python-api-examples/speech-recognition-from-microphone.py b/python-api-examples/speech-recognition-from-microphone.py index e4fb1d1d..9e42873f 100755 --- a/python-api-examples/speech-recognition-from-microphone.py +++ b/python-api-examples/speech-recognition-from-microphone.py @@ -123,6 +123,27 @@ def get_args(): """, ) + parser.add_argument( + "--hr-dict-dir", + type=str, + default="", + help="If not empty, it is the jieba dict directory for homophone replacer", + ) + + parser.add_argument( + "--hr-lexicon", + type=str, + default="", + help="If not empty, it is the lexicon.txt for homophone replacer", + ) + + parser.add_argument( + "--hr-rule-fsts", + type=str, + default="", + help="If not empty, it is the replace.fst for homophone replacer", + ) + return parser.parse_args() @@ -148,6 +169,9 @@ def create_recognizer(args): hotwords_file=args.hotwords_file, hotwords_score=args.hotwords_score, blank_penalty=args.blank_penalty, + hr_dict_dir=args.hr_dict_dir, + hr_rule_fsts=args.hr_rule_fsts, + hr_lexicon=args.hr_lexicon, ) return recognizer diff --git a/python-api-examples/speech-recognition-from-url.py b/python-api-examples/speech-recognition-from-url.py index 52c5a25a..b47f0f9d 100755 --- a/python-api-examples/speech-recognition-from-url.py +++ b/python-api-examples/speech-recognition-from-url.py @@ -110,6 +110,26 @@ def get_args(): """, ) + parser.add_argument( + "--hr-dict-dir", + type=str, + default="", + help="If not empty, it is the jieba dict directory for homophone replacer", + ) + + parser.add_argument( + "--hr-lexicon", + type=str, + default="", + help="If not empty, it is the lexicon.txt for homophone replacer", + ) + + parser.add_argument( + "--hr-rule-fsts", + type=str, + default="", + help="If not empty, it is the replace.fst for homophone replacer", + ) return parser.parse_args() @@ -133,6 +153,9 @@ def create_recognizer(args): rule3_min_utterance_length=300, # it essentially disables this rule hotwords_file=args.hotwords_file, hotwords_score=args.hotwords_score, + hr_dict_dir=args.hr_dict_dir, + hr_rule_fsts=args.hr_rule_fsts, + hr_lexicon=args.hr_lexicon, ) return recognizer diff --git a/python-api-examples/vad-with-non-streaming-asr.py b/python-api-examples/vad-with-non-streaming-asr.py index f5bde30c..ee5ae5f8 100755 --- a/python-api-examples/vad-with-non-streaming-asr.py +++ b/python-api-examples/vad-with-non-streaming-asr.py @@ -273,6 +273,27 @@ def get_args(): help="Feature dimension. Must match the one expected by the model", ) + parser.add_argument( + "--hr-dict-dir", + type=str, + default="", + help="If not empty, it is the jieba dict directory for homophone replacer", + ) + + parser.add_argument( + "--hr-lexicon", + type=str, + default="", + help="If not empty, it is the lexicon.txt for homophone replacer", + ) + + parser.add_argument( + "--hr-rule-fsts", + type=str, + default="", + help="If not empty, it is the replace.fst for homophone replacer", + ) + return parser.parse_args() @@ -312,6 +333,9 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer: decoding_method=args.decoding_method, blank_penalty=args.blank_penalty, debug=args.debug, + hr_dict_dir=args.hr_dict_dir, + hr_rule_fsts=args.hr_rule_fsts, + hr_lexicon=args.hr_lexicon, ) elif args.paraformer: assert len(args.sense_voice) == 0, args.sense_voice @@ -334,6 +358,9 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer: feature_dim=args.feature_dim, decoding_method=args.decoding_method, debug=args.debug, + hr_dict_dir=args.hr_dict_dir, + hr_rule_fsts=args.hr_rule_fsts, + hr_lexicon=args.hr_lexicon, ) elif args.sense_voice: assert len(args.whisper_encoder) == 0, args.whisper_encoder @@ -352,6 +379,9 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer: num_threads=args.num_threads, use_itn=True, debug=args.debug, + hr_dict_dir=args.hr_dict_dir, + hr_rule_fsts=args.hr_rule_fsts, + hr_lexicon=args.hr_lexicon, ) elif args.whisper_encoder: assert_file_exists(args.whisper_encoder) @@ -373,6 +403,9 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer: language=args.whisper_language, task=args.whisper_task, tail_paddings=args.whisper_tail_paddings, + hr_dict_dir=args.hr_dict_dir, + hr_rule_fsts=args.hr_rule_fsts, + hr_lexicon=args.hr_lexicon, ) elif args.moonshine_preprocessor: assert_file_exists(args.moonshine_preprocessor) @@ -389,6 +422,9 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer: num_threads=args.num_threads, decoding_method=args.decoding_method, debug=args.debug, + hr_dict_dir=args.hr_dict_dir, + hr_rule_fsts=args.hr_rule_fsts, + hr_lexicon=args.hr_lexicon, ) else: raise ValueError("Please specify at least one model")