CUDA: larger SRAM reads for tile FA, AMD FP16 dot (#15927)
* CUDA: larger SRAM reads for tile FA, AMD FP16 dot * fix logic for availability of v_dot2_f32_f16
This commit is contained in:
8
ggml/src/ggml-cuda/vendors/hip.h
vendored
8
ggml/src/ggml-cuda/vendors/hip.h
vendored
@@ -162,6 +162,14 @@
|
||||
#define GCN
|
||||
#endif
|
||||
|
||||
#if defined(__gfx900__) || defined(__gfx906__)
|
||||
#define GCN5
|
||||
#endif
|
||||
|
||||
#if defined(__gfx803__)
|
||||
#define GCN4
|
||||
#endif
|
||||
|
||||
#if defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx942__)
|
||||
#define CDNA // For the entire family
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user