将分散在各文件中的CUDA/HIP/MUSA硬件相关头文件引用统一到vendors目录下的对应头文件中,提高代码可维护性。移除重复的头文件引用,优化构建配置。
33 lines
950 B
Plaintext
33 lines
950 B
Plaintext
#include "cuda_utils.h"
|
|
#include "vendors/functions.h"
|
|
|
|
int64_t get_device_attribute(int64_t attribute, int64_t device_id) {
|
|
// Return the cached value on subsequent calls
|
|
static int value = [=]() {
|
|
int device = static_cast<int>(device_id);
|
|
if (device < 0) {
|
|
CUDA_CHECK(cudaGetDevice(&device));
|
|
}
|
|
int value;
|
|
CUDA_CHECK(cudaDeviceGetAttribute(
|
|
&value, static_cast<cudaDeviceAttr>(attribute), device));
|
|
return static_cast<int>(value);
|
|
}();
|
|
|
|
return value;
|
|
}
|
|
|
|
int64_t get_max_shared_memory_per_block_device_attribute(int64_t device_id) {
|
|
int64_t attribute;
|
|
// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html
|
|
// cudaDevAttrMaxSharedMemoryPerBlockOptin = 97 if not is_hip() else 74
|
|
|
|
#ifdef USE_ROCM
|
|
attribute = hipDeviceAttributeMaxSharedMemoryPerBlock;
|
|
#else
|
|
attribute = cudaDevAttrMaxSharedMemoryPerBlockOptin;
|
|
#endif
|
|
|
|
return get_device_attribute(attribute, device_id);
|
|
}
|