Proper convolution mode for fast GPU processing (#350)

This commit is contained in:
Nickolay V. Shmyrev
2023-10-07 15:24:57 +03:00
committed by GitHub
parent 36017d49c4
commit c12286fe5e

View File

@@ -25,6 +25,11 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads,
sess_opts.SetIntraOpNumThreads(num_threads);
sess_opts.SetInterOpNumThreads(num_threads);
// Other possible options
// sess_opts.SetGraphOptimizationLevel(ORT_ENABLE_EXTENDED);
// sess_opts.SetLogSeverityLevel(ORT_LOGGING_LEVEL_VERBOSE);
// sess_opts.EnableProfiling("profile");
switch (p) {
case Provider::kCPU:
break; // nothing to do for the CPU provider
@@ -36,6 +41,8 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads,
// The CUDA provider is available, proceed with setting the options
OrtCUDAProviderOptions options;
options.device_id = 0;
// Default OrtCudnnConvAlgoSearchExhaustive is extremely slow
options.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchHeuristic;
// set more options on need
sess_opts.AppendExecutionProvider_CUDA(options);
} else {