change sgl_kernel WARP_SIZE to 64

This commit is contained in:
maxiao1
2025-11-03 10:17:53 +08:00
parent 8fc552638f
commit 75cd34d172
5 changed files with 5 additions and 5 deletions

View File

@@ -3,7 +3,7 @@
// copied from https://github.com/ggerganov/llama.cpp/blob/b2899/ggml-common.h
#define QK_K 256
#define K_QUANTS_PER_ITERATION 2
#define WARP_SIZE_GGUF 32
#define WARP_SIZE_GGUF 64
#define K_SCALE_SIZE 12
#define CUDA_DEQUANTIZE_BLOCK_SIZE 256
#define CUDA_QUANTIZE_BLOCK_SIZE 256