encoder only trt ep for transducer (#1130)

This commit is contained in:
Manickavela
2024-07-15 12:22:33 +05:30
committed by GitHub
parent c35200dccf
commit 11cfd33b10
4 changed files with 31 additions and 7 deletions

View File

@@ -94,7 +94,6 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads,
std::to_string(trt_config.trt_timing_cache_enable);
auto trt_dump_subgraphs =
std::to_string(trt_config.trt_dump_subgraphs);
std::vector<TrtPairs> trt_options = {
{"device_id", device_id.c_str()},
{"trt_max_workspace_size", trt_max_workspace_size.c_str()},
@@ -223,6 +222,21 @@ Ort::SessionOptions GetSessionOptions(const OnlineModelConfig &config) {
config.provider_config.provider, &config.provider_config);
}
Ort::SessionOptions GetSessionOptions(const OnlineModelConfig &config,
const std::string &model_type) {
/*
Transducer models : Only encoder will run with tensorrt,
decoder and joiner will run with cuda
*/
if(config.provider_config.provider == "trt" &&
(model_type == "decoder" || model_type == "joiner")) {
return GetSessionOptionsImpl(config.num_threads,
"cuda", &config.provider_config);
}
return GetSessionOptionsImpl(config.num_threads,
config.provider_config.provider, &config.provider_config);
}
Ort::SessionOptions GetSessionOptions(const OfflineModelConfig &config) {
return GetSessionOptionsImpl(config.num_threads, config.provider);
}