Proper convolution mode for fast GPU processing (#350)
This commit is contained in:
committed by
GitHub
parent
36017d49c4
commit
c12286fe5e
@@ -25,6 +25,11 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads,
|
|||||||
sess_opts.SetIntraOpNumThreads(num_threads);
|
sess_opts.SetIntraOpNumThreads(num_threads);
|
||||||
sess_opts.SetInterOpNumThreads(num_threads);
|
sess_opts.SetInterOpNumThreads(num_threads);
|
||||||
|
|
||||||
|
// Other possible options
|
||||||
|
// sess_opts.SetGraphOptimizationLevel(ORT_ENABLE_EXTENDED);
|
||||||
|
// sess_opts.SetLogSeverityLevel(ORT_LOGGING_LEVEL_VERBOSE);
|
||||||
|
// sess_opts.EnableProfiling("profile");
|
||||||
|
|
||||||
switch (p) {
|
switch (p) {
|
||||||
case Provider::kCPU:
|
case Provider::kCPU:
|
||||||
break; // nothing to do for the CPU provider
|
break; // nothing to do for the CPU provider
|
||||||
@@ -36,6 +41,8 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads,
|
|||||||
// The CUDA provider is available, proceed with setting the options
|
// The CUDA provider is available, proceed with setting the options
|
||||||
OrtCUDAProviderOptions options;
|
OrtCUDAProviderOptions options;
|
||||||
options.device_id = 0;
|
options.device_id = 0;
|
||||||
|
// Default OrtCudnnConvAlgoSearchExhaustive is extremely slow
|
||||||
|
options.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchHeuristic;
|
||||||
// set more options on need
|
// set more options on need
|
||||||
sess_opts.AppendExecutionProvider_CUDA(options);
|
sess_opts.AppendExecutionProvider_CUDA(options);
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
Reference in New Issue
Block a user