fix(ggml-cuda): 修正CUDA编译标志和WARP_SIZE配置
更新CUDA编译标志以使用正确的fast-math和extended-lambda选项 调整WARP_SIZE为64以适配目标硬件 移除-Wmissing-noreturn警告选项 修复cudaStreamWaitEvent调用缺少参数的问题
This commit is contained in:
@@ -40,12 +40,12 @@
|
||||
#define STRINGIZE_IMPL(...) #__VA_ARGS__
|
||||
#define STRINGIZE(...) STRINGIZE_IMPL(__VA_ARGS__)
|
||||
|
||||
#define WARP_SIZE 32
|
||||
#define WARP_SIZE 64
|
||||
#define CUDART_HMAX 11070 // CUDA 11.7, min. ver. for which __hmax and __hmax2 are known to work (may be higher than needed)
|
||||
#define CUDART_HMASK 12000 // CUDA 12.0, min. ver. for half2 -> uint mask comparisons
|
||||
|
||||
#define GGML_CUDA_CC_PASCAL 600
|
||||
#define GGML_CUDA_CC_DP4A 610 // minimum compute capability for __dp4a, an intrinsic for byte-wise dot products
|
||||
#define GGML_CUDA_CC_PASCAL 300
|
||||
#define GGML_CUDA_CC_DP4A 300 // minimum compute capability for __dp4a, an intrinsic for byte-wise dot products
|
||||
#define GGML_CUDA_CC_VOLTA 700
|
||||
#define GGML_CUDA_CC_TURING 750
|
||||
#define GGML_CUDA_CC_AMPERE 800
|
||||
@@ -350,7 +350,8 @@ static __device__ void no_device_code(
|
||||
printf("%s:%d: ERROR: CUDA kernel %s has no device code compatible with CUDA arch %d. ggml-cuda.cu was compiled for: %s\n",
|
||||
file_name, line, function_name, arch, arch_list);
|
||||
#endif // defined(GGML_USE_HIP)
|
||||
__trap();
|
||||
// __trap();
|
||||
__builtin_trap();
|
||||
|
||||
GGML_UNUSED(no_device_code); // suppress unused function warning
|
||||
|
||||
|
||||
Reference in New Issue
Block a user