Reduce model initialization time for online speech recognition (#215)
* Reduce model initialization time for online speech recognition * Fixed Styling --------- Co-authored-by: w11wo <wilsowong961@gmail.com>
This commit is contained in:
@@ -22,26 +22,30 @@ void OnlineTransducerModelConfig::Register(ParseOptions *po) {
|
||||
|
||||
po->Register("debug", &debug,
|
||||
"true to print model information while loading it.");
|
||||
po->Register("model-type", &model_type,
|
||||
"Specify it to reduce model initialization time. "
|
||||
"Valid values are: conformer, lstm, zipformer, zipformer2. "
|
||||
"All other values lead to loading the model twice.");
|
||||
}
|
||||
|
||||
bool OnlineTransducerModelConfig::Validate() const {
|
||||
if (!FileExists(tokens)) {
|
||||
SHERPA_ONNX_LOGE("%s does not exist", tokens.c_str());
|
||||
SHERPA_ONNX_LOGE("tokens: %s does not exist", tokens.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!FileExists(encoder_filename)) {
|
||||
SHERPA_ONNX_LOGE("%s does not exist", encoder_filename.c_str());
|
||||
SHERPA_ONNX_LOGE("encoder: %s does not exist", encoder_filename.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!FileExists(decoder_filename)) {
|
||||
SHERPA_ONNX_LOGE("%s does not exist", decoder_filename.c_str());
|
||||
SHERPA_ONNX_LOGE("decoder: %s does not exist", decoder_filename.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!FileExists(joiner_filename)) {
|
||||
SHERPA_ONNX_LOGE("%s does not exist", joiner_filename.c_str());
|
||||
SHERPA_ONNX_LOGE("joiner: %s does not exist", joiner_filename.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -63,6 +67,7 @@ std::string OnlineTransducerModelConfig::ToString() const {
|
||||
os << "tokens=\"" << tokens << "\", ";
|
||||
os << "num_threads=" << num_threads << ", ";
|
||||
os << "provider=\"" << provider << "\", ";
|
||||
os << "model_type=\"" << model_type << "\", ";
|
||||
os << "debug=" << (debug ? "True" : "False") << ")";
|
||||
|
||||
return os.str();
|
||||
|
||||
@@ -19,19 +19,33 @@ struct OnlineTransducerModelConfig {
|
||||
bool debug = false;
|
||||
std::string provider = "cpu";
|
||||
|
||||
// With the help of this field, we only need to load the model once
|
||||
// instead of twice; and therefore it reduces initialization time.
|
||||
//
|
||||
// Valid values:
|
||||
// - conformer
|
||||
// - lstm
|
||||
// - zipformer
|
||||
// - zipformer2
|
||||
//
|
||||
// All other values are invalid and lead to loading the model twice.
|
||||
std::string model_type;
|
||||
|
||||
OnlineTransducerModelConfig() = default;
|
||||
OnlineTransducerModelConfig(const std::string &encoder_filename,
|
||||
const std::string &decoder_filename,
|
||||
const std::string &joiner_filename,
|
||||
const std::string &tokens, int32_t num_threads,
|
||||
bool debug, const std::string &provider)
|
||||
bool debug, const std::string &provider,
|
||||
const std::string &model_type)
|
||||
: encoder_filename(encoder_filename),
|
||||
decoder_filename(decoder_filename),
|
||||
joiner_filename(joiner_filename),
|
||||
tokens(tokens),
|
||||
num_threads(num_threads),
|
||||
debug(debug),
|
||||
provider(provider) {}
|
||||
provider(provider),
|
||||
model_type(model_type) {}
|
||||
|
||||
void Register(ParseOptions *po);
|
||||
bool Validate() const;
|
||||
|
||||
@@ -77,6 +77,22 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,
|
||||
|
||||
std::unique_ptr<OnlineTransducerModel> OnlineTransducerModel::Create(
|
||||
const OnlineTransducerModelConfig &config) {
|
||||
if (!config.model_type.empty()) {
|
||||
const auto &model_type = config.model_type;
|
||||
if (model_type == "conformer") {
|
||||
return std::make_unique<OnlineConformerTransducerModel>(config);
|
||||
} else if (model_type == "lstm") {
|
||||
return std::make_unique<OnlineLstmTransducerModel>(config);
|
||||
} else if (model_type == "zipformer") {
|
||||
return std::make_unique<OnlineZipformerTransducerModel>(config);
|
||||
} else if (model_type == "zipformer2") {
|
||||
return std::make_unique<OnlineZipformer2TransducerModel>(config);
|
||||
} else {
|
||||
SHERPA_ONNX_LOGE(
|
||||
"Invalid model_type: %s. Trying to load the model to get its type",
|
||||
model_type.c_str());
|
||||
}
|
||||
}
|
||||
ModelType model_type = ModelType::kUnkown;
|
||||
|
||||
{
|
||||
@@ -140,6 +156,23 @@ Ort::Value OnlineTransducerModel::BuildDecoderInput(
|
||||
#if __ANDROID_API__ >= 9
|
||||
std::unique_ptr<OnlineTransducerModel> OnlineTransducerModel::Create(
|
||||
AAssetManager *mgr, const OnlineTransducerModelConfig &config) {
|
||||
if (!config.model_type.empty()) {
|
||||
const auto &model_type = config.model_type;
|
||||
if (model_type == "conformer") {
|
||||
return std::make_unique<OnlineConformerTransducerModel>(mgr, config);
|
||||
} else if (model_type == "lstm") {
|
||||
return std::make_unique<OnlineLstmTransducerModel>(mgr, config);
|
||||
} else if (model_type == "zipformer") {
|
||||
return std::make_unique<OnlineZipformerTransducerModel>(mgr, config);
|
||||
} else if (model_type == "zipformer2") {
|
||||
return std::make_unique<OnlineZipformer2TransducerModel>(mgr, config);
|
||||
} else {
|
||||
SHERPA_ONNX_LOGE(
|
||||
"Invalid model_type: %s. Trying to load the model to get its type",
|
||||
model_type.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
auto buffer = ReadFile(mgr, config.encoder_filename);
|
||||
auto model_type = GetModelType(buffer.data(), buffer.size(), config.debug);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user