Proper convolution mode for fast GPU processing (#350)
This commit is contained in:
committed by
GitHub
parent
36017d49c4
commit
c12286fe5e
@@ -25,6 +25,11 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads,
|
||||
sess_opts.SetIntraOpNumThreads(num_threads);
|
||||
sess_opts.SetInterOpNumThreads(num_threads);
|
||||
|
||||
// Other possible options
|
||||
// sess_opts.SetGraphOptimizationLevel(ORT_ENABLE_EXTENDED);
|
||||
// sess_opts.SetLogSeverityLevel(ORT_LOGGING_LEVEL_VERBOSE);
|
||||
// sess_opts.EnableProfiling("profile");
|
||||
|
||||
switch (p) {
|
||||
case Provider::kCPU:
|
||||
break; // nothing to do for the CPU provider
|
||||
@@ -36,6 +41,8 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads,
|
||||
// The CUDA provider is available, proceed with setting the options
|
||||
OrtCUDAProviderOptions options;
|
||||
options.device_id = 0;
|
||||
// Default OrtCudnnConvAlgoSearchExhaustive is extremely slow
|
||||
options.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchHeuristic;
|
||||
// set more options on need
|
||||
sess_opts.AppendExecutionProvider_CUDA(options);
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user