change sgl_kernel WARP_SIZE to 64

2025-11-03 10:17:53 +08:00
parent 8fc552638f
commit 75cd34d172
5 changed files with 5 additions and 5 deletions
--- a/sgl-kernel/csrc/quantization/gguf/ggml-common.h
+++ b/sgl-kernel/csrc/quantization/gguf/ggml-common.h
@@ -3,7 +3,7 @@
 // copied from https://github.com/ggerganov/llama.cpp/blob/b2899/ggml-common.h
 #define QK_K 256
 #define K_QUANTS_PER_ITERATION 2
-#define WARP_SIZE_GGUF 32
+#define WARP_SIZE_GGUF 64
 #define K_SCALE_SIZE 12
 #define CUDA_DEQUANTIZE_BLOCK_SIZE 256
 #define CUDA_QUANTIZE_BLOCK_SIZE 256