Add homonphone replacer example for Python API. (#2161)
This commit is contained in:
2
.github/scripts/test-offline-transducer.sh
vendored
2
.github/scripts/test-offline-transducer.sh
vendored
@@ -30,7 +30,7 @@ $EXE \
|
|||||||
--model-type=nemo_transducer \
|
--model-type=nemo_transducer \
|
||||||
./sherpa-onnx-nemo-transducer-giga-am-v2-russian-2025-04-19/test_wavs/example.wav
|
./sherpa-onnx-nemo-transducer-giga-am-v2-russian-2025-04-19/test_wavs/example.wav
|
||||||
|
|
||||||
rm sherpa-onnx-nemo-transducer-giga-am-v2-russian-2025-04-19
|
rm -rf sherpa-onnx-nemo-transducer-giga-am-v2-russian-2025-04-19
|
||||||
|
|
||||||
|
|
||||||
log "------------------------------------------------------------------------"
|
log "------------------------------------------------------------------------"
|
||||||
|
|||||||
2
.github/workflows/rknn-linux-aarch64.yaml
vendored
2
.github/workflows/rknn-linux-aarch64.yaml
vendored
@@ -28,7 +28,7 @@ on:
|
|||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: aarch64-linux-gnu-shared-${{ github.ref }}
|
group: rknn-linux-aarch64-${{ github.ref }}
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
|||||||
@@ -106,6 +106,27 @@ def get_args():
|
|||||||
""",
|
""",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--hr-dict-dir",
|
||||||
|
type=str,
|
||||||
|
default="",
|
||||||
|
help="If not empty, it is the jieba dict directory for homophone replacer",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--hr-lexicon",
|
||||||
|
type=str,
|
||||||
|
default="",
|
||||||
|
help="If not empty, it is the lexicon.txt for homophone replacer",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--hr-rule-fsts",
|
||||||
|
type=str,
|
||||||
|
default="",
|
||||||
|
help="If not empty, it is the replace.fst for homophone replacer",
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--device-name",
|
"--device-name",
|
||||||
type=str,
|
type=str,
|
||||||
@@ -159,6 +180,9 @@ def create_recognizer(args):
|
|||||||
hotwords_file=args.hotwords_file,
|
hotwords_file=args.hotwords_file,
|
||||||
hotwords_score=args.hotwords_score,
|
hotwords_score=args.hotwords_score,
|
||||||
blank_penalty=args.blank_penalty,
|
blank_penalty=args.blank_penalty,
|
||||||
|
hr_dict_dir=args.hr_dict_dir,
|
||||||
|
hr_rule_fsts=args.hr_rule_fsts,
|
||||||
|
hr_lexicon=args.hr_lexicon,
|
||||||
)
|
)
|
||||||
return recognizer
|
return recognizer
|
||||||
|
|
||||||
|
|||||||
@@ -114,6 +114,27 @@ def get_args():
|
|||||||
""",
|
""",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--hr-dict-dir",
|
||||||
|
type=str,
|
||||||
|
default="",
|
||||||
|
help="If not empty, it is the jieba dict directory for homophone replacer",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--hr-lexicon",
|
||||||
|
type=str,
|
||||||
|
default="",
|
||||||
|
help="If not empty, it is the lexicon.txt for homophone replacer",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--hr-rule-fsts",
|
||||||
|
type=str,
|
||||||
|
default="",
|
||||||
|
help="If not empty, it is the replace.fst for homophone replacer",
|
||||||
|
)
|
||||||
|
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
@@ -142,6 +163,9 @@ def create_recognizer(args):
|
|||||||
hotwords_file=args.hotwords_file,
|
hotwords_file=args.hotwords_file,
|
||||||
hotwords_score=args.hotwords_score,
|
hotwords_score=args.hotwords_score,
|
||||||
blank_penalty=args.blank_penalty,
|
blank_penalty=args.blank_penalty,
|
||||||
|
hr_dict_dir=args.hr_dict_dir,
|
||||||
|
hr_rule_fsts=args.hr_rule_fsts,
|
||||||
|
hr_lexicon=args.hr_lexicon,
|
||||||
)
|
)
|
||||||
return recognizer
|
return recognizer
|
||||||
|
|
||||||
|
|||||||
@@ -123,6 +123,27 @@ def get_args():
|
|||||||
""",
|
""",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--hr-dict-dir",
|
||||||
|
type=str,
|
||||||
|
default="",
|
||||||
|
help="If not empty, it is the jieba dict directory for homophone replacer",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--hr-lexicon",
|
||||||
|
type=str,
|
||||||
|
default="",
|
||||||
|
help="If not empty, it is the lexicon.txt for homophone replacer",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--hr-rule-fsts",
|
||||||
|
type=str,
|
||||||
|
default="",
|
||||||
|
help="If not empty, it is the replace.fst for homophone replacer",
|
||||||
|
)
|
||||||
|
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
@@ -148,6 +169,9 @@ def create_recognizer(args):
|
|||||||
hotwords_file=args.hotwords_file,
|
hotwords_file=args.hotwords_file,
|
||||||
hotwords_score=args.hotwords_score,
|
hotwords_score=args.hotwords_score,
|
||||||
blank_penalty=args.blank_penalty,
|
blank_penalty=args.blank_penalty,
|
||||||
|
hr_dict_dir=args.hr_dict_dir,
|
||||||
|
hr_rule_fsts=args.hr_rule_fsts,
|
||||||
|
hr_lexicon=args.hr_lexicon,
|
||||||
)
|
)
|
||||||
return recognizer
|
return recognizer
|
||||||
|
|
||||||
|
|||||||
@@ -110,6 +110,26 @@ def get_args():
|
|||||||
""",
|
""",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--hr-dict-dir",
|
||||||
|
type=str,
|
||||||
|
default="",
|
||||||
|
help="If not empty, it is the jieba dict directory for homophone replacer",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--hr-lexicon",
|
||||||
|
type=str,
|
||||||
|
default="",
|
||||||
|
help="If not empty, it is the lexicon.txt for homophone replacer",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--hr-rule-fsts",
|
||||||
|
type=str,
|
||||||
|
default="",
|
||||||
|
help="If not empty, it is the replace.fst for homophone replacer",
|
||||||
|
)
|
||||||
|
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
@@ -133,6 +153,9 @@ def create_recognizer(args):
|
|||||||
rule3_min_utterance_length=300, # it essentially disables this rule
|
rule3_min_utterance_length=300, # it essentially disables this rule
|
||||||
hotwords_file=args.hotwords_file,
|
hotwords_file=args.hotwords_file,
|
||||||
hotwords_score=args.hotwords_score,
|
hotwords_score=args.hotwords_score,
|
||||||
|
hr_dict_dir=args.hr_dict_dir,
|
||||||
|
hr_rule_fsts=args.hr_rule_fsts,
|
||||||
|
hr_lexicon=args.hr_lexicon,
|
||||||
)
|
)
|
||||||
return recognizer
|
return recognizer
|
||||||
|
|
||||||
|
|||||||
@@ -273,6 +273,27 @@ def get_args():
|
|||||||
help="Feature dimension. Must match the one expected by the model",
|
help="Feature dimension. Must match the one expected by the model",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--hr-dict-dir",
|
||||||
|
type=str,
|
||||||
|
default="",
|
||||||
|
help="If not empty, it is the jieba dict directory for homophone replacer",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--hr-lexicon",
|
||||||
|
type=str,
|
||||||
|
default="",
|
||||||
|
help="If not empty, it is the lexicon.txt for homophone replacer",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--hr-rule-fsts",
|
||||||
|
type=str,
|
||||||
|
default="",
|
||||||
|
help="If not empty, it is the replace.fst for homophone replacer",
|
||||||
|
)
|
||||||
|
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
@@ -312,6 +333,9 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
|
|||||||
decoding_method=args.decoding_method,
|
decoding_method=args.decoding_method,
|
||||||
blank_penalty=args.blank_penalty,
|
blank_penalty=args.blank_penalty,
|
||||||
debug=args.debug,
|
debug=args.debug,
|
||||||
|
hr_dict_dir=args.hr_dict_dir,
|
||||||
|
hr_rule_fsts=args.hr_rule_fsts,
|
||||||
|
hr_lexicon=args.hr_lexicon,
|
||||||
)
|
)
|
||||||
elif args.paraformer:
|
elif args.paraformer:
|
||||||
assert len(args.sense_voice) == 0, args.sense_voice
|
assert len(args.sense_voice) == 0, args.sense_voice
|
||||||
@@ -334,6 +358,9 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
|
|||||||
feature_dim=args.feature_dim,
|
feature_dim=args.feature_dim,
|
||||||
decoding_method=args.decoding_method,
|
decoding_method=args.decoding_method,
|
||||||
debug=args.debug,
|
debug=args.debug,
|
||||||
|
hr_dict_dir=args.hr_dict_dir,
|
||||||
|
hr_rule_fsts=args.hr_rule_fsts,
|
||||||
|
hr_lexicon=args.hr_lexicon,
|
||||||
)
|
)
|
||||||
elif args.sense_voice:
|
elif args.sense_voice:
|
||||||
assert len(args.whisper_encoder) == 0, args.whisper_encoder
|
assert len(args.whisper_encoder) == 0, args.whisper_encoder
|
||||||
@@ -352,6 +379,9 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
|
|||||||
num_threads=args.num_threads,
|
num_threads=args.num_threads,
|
||||||
use_itn=True,
|
use_itn=True,
|
||||||
debug=args.debug,
|
debug=args.debug,
|
||||||
|
hr_dict_dir=args.hr_dict_dir,
|
||||||
|
hr_rule_fsts=args.hr_rule_fsts,
|
||||||
|
hr_lexicon=args.hr_lexicon,
|
||||||
)
|
)
|
||||||
elif args.whisper_encoder:
|
elif args.whisper_encoder:
|
||||||
assert_file_exists(args.whisper_encoder)
|
assert_file_exists(args.whisper_encoder)
|
||||||
@@ -373,6 +403,9 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
|
|||||||
language=args.whisper_language,
|
language=args.whisper_language,
|
||||||
task=args.whisper_task,
|
task=args.whisper_task,
|
||||||
tail_paddings=args.whisper_tail_paddings,
|
tail_paddings=args.whisper_tail_paddings,
|
||||||
|
hr_dict_dir=args.hr_dict_dir,
|
||||||
|
hr_rule_fsts=args.hr_rule_fsts,
|
||||||
|
hr_lexicon=args.hr_lexicon,
|
||||||
)
|
)
|
||||||
elif args.moonshine_preprocessor:
|
elif args.moonshine_preprocessor:
|
||||||
assert_file_exists(args.moonshine_preprocessor)
|
assert_file_exists(args.moonshine_preprocessor)
|
||||||
@@ -389,6 +422,9 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
|
|||||||
num_threads=args.num_threads,
|
num_threads=args.num_threads,
|
||||||
decoding_method=args.decoding_method,
|
decoding_method=args.decoding_method,
|
||||||
debug=args.debug,
|
debug=args.debug,
|
||||||
|
hr_dict_dir=args.hr_dict_dir,
|
||||||
|
hr_rule_fsts=args.hr_rule_fsts,
|
||||||
|
hr_lexicon=args.hr_lexicon,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
raise ValueError("Please specify at least one model")
|
raise ValueError("Please specify at least one model")
|
||||||
|
|||||||
Reference in New Issue
Block a user