@@ -49,6 +49,8 @@ class OfflineRecognizer(object):
|
||||
hotwords_file: str = "",
|
||||
hotwords_score: float = 1.5,
|
||||
blank_penalty: float = 0.0,
|
||||
modeling_unit: str = "cjkchar",
|
||||
bpe_vocab: str = "",
|
||||
debug: bool = False,
|
||||
provider: str = "cpu",
|
||||
model_type: str = "transducer",
|
||||
@@ -91,6 +93,16 @@ class OfflineRecognizer(object):
|
||||
hotwords_file is given with modified_beam_search as decoding method.
|
||||
blank_penalty:
|
||||
The penalty applied on blank symbol during decoding.
|
||||
modeling_unit:
|
||||
The modeling unit of the model, commonly used units are bpe, cjkchar,
|
||||
cjkchar+bpe, etc. Currently, it is needed only when hotwords are
|
||||
provided, we need it to encode the hotwords into token sequence.
|
||||
and the modeling unit is bpe or cjkchar+bpe.
|
||||
bpe_vocab:
|
||||
The vocabulary generated by google's sentencepiece program.
|
||||
It is a file has two columns, one is the token, the other is
|
||||
the log probability, you can get it from the directory where
|
||||
your bpe model is generated. Only used when hotwords provided
|
||||
debug:
|
||||
True to show debug messages.
|
||||
provider:
|
||||
@@ -107,6 +119,8 @@ class OfflineRecognizer(object):
|
||||
num_threads=num_threads,
|
||||
debug=debug,
|
||||
provider=provider,
|
||||
modeling_unit=modeling_unit,
|
||||
bpe_vocab=bpe_vocab,
|
||||
model_type=model_type,
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user