@@ -58,6 +58,8 @@ class OnlineRecognizer(object):
|
||||
hotwords_file: str = "",
|
||||
provider: str = "cpu",
|
||||
model_type: str = "",
|
||||
modeling_unit: str = "cjkchar",
|
||||
bpe_vocab: str = "",
|
||||
lm: str = "",
|
||||
lm_scale: float = 0.1,
|
||||
temperature_scale: float = 2.0,
|
||||
@@ -136,6 +138,16 @@ class OnlineRecognizer(object):
|
||||
model_type:
|
||||
Online transducer model type. Valid values are: conformer, lstm,
|
||||
zipformer, zipformer2. All other values lead to loading the model twice.
|
||||
modeling_unit:
|
||||
The modeling unit of the model, commonly used units are bpe, cjkchar,
|
||||
cjkchar+bpe, etc. Currently, it is needed only when hotwords are
|
||||
provided, we need it to encode the hotwords into token sequence.
|
||||
bpe_vocab:
|
||||
The vocabulary generated by google's sentencepiece program.
|
||||
It is a file has two columns, one is the token, the other is
|
||||
the log probability, you can get it from the directory where
|
||||
your bpe model is generated. Only used when hotwords provided
|
||||
and the modeling unit is bpe or cjkchar+bpe.
|
||||
"""
|
||||
self = cls.__new__(cls)
|
||||
_assert_file_exists(tokens)
|
||||
@@ -157,6 +169,8 @@ class OnlineRecognizer(object):
|
||||
num_threads=num_threads,
|
||||
provider=provider,
|
||||
model_type=model_type,
|
||||
modeling_unit=modeling_unit,
|
||||
bpe_vocab=bpe_vocab,
|
||||
debug=debug,
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user