@@ -32,6 +32,19 @@ void OnlineModelConfig::Register(ParseOptions *po) {
|
||||
po->Register("provider", &provider,
|
||||
"Specify a provider to use: cpu, cuda, coreml");
|
||||
|
||||
po->Register("modeling-unit", &modeling_unit,
|
||||
"The modeling unit of the model, commonly used units are bpe, "
|
||||
"cjkchar, cjkchar+bpe, etc. Currently, it is needed only when "
|
||||
"hotwords are provided, we need it to encode the hotwords into "
|
||||
"token sequence.");
|
||||
|
||||
po->Register("bpe-vocab", &bpe_vocab,
|
||||
"The vocabulary generated by google's sentencepiece program. "
|
||||
"It is a file has two columns, one is the token, the other is "
|
||||
"the log probability, you can get it from the directory where "
|
||||
"your bpe model is generated. Only used when hotwords provided "
|
||||
"and the modeling unit is bpe or cjkchar+bpe");
|
||||
|
||||
po->Register("model-type", &model_type,
|
||||
"Specify it to reduce model initialization time. "
|
||||
"Valid values are: conformer, lstm, zipformer, zipformer2, "
|
||||
@@ -50,6 +63,14 @@ bool OnlineModelConfig::Validate() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!modeling_unit.empty() &&
|
||||
(modeling_unit == "bpe" || modeling_unit == "cjkchar+bpe")) {
|
||||
if (!FileExists(bpe_vocab)) {
|
||||
SHERPA_ONNX_LOGE("bpe_vocab: %s does not exist", bpe_vocab.c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!paraformer.encoder.empty()) {
|
||||
return paraformer.Validate();
|
||||
}
|
||||
@@ -83,7 +104,9 @@ std::string OnlineModelConfig::ToString() const {
|
||||
os << "warm_up=" << warm_up << ", ";
|
||||
os << "debug=" << (debug ? "True" : "False") << ", ";
|
||||
os << "provider=\"" << provider << "\", ";
|
||||
os << "model_type=\"" << model_type << "\")";
|
||||
os << "model_type=\"" << model_type << "\", ";
|
||||
os << "modeling_unit=\"" << modeling_unit << "\", ";
|
||||
os << "bpe_vocab=\"" << bpe_vocab << "\")";
|
||||
|
||||
return os.str();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user