[fix] fix potential bumpy throughtput with deepgemm (#5722)

This commit is contained in:
JieXin Liang
2025-04-27 09:12:48 +08:00
committed by GitHub
parent dfb322642f
commit eebfdb9459
2 changed files with 17 additions and 10 deletions

View File

@@ -27,7 +27,7 @@ from sglang.srt.warmup import warmup
multiprocessing.set_start_method("spawn", force=True)
# Reduce warning
os.environ["SGL_IN_DEEP_GEMM_PRE_COMPILE_STAGE"] = "1"
os.environ["SGL_IN_DEEPGEMM_PRECOMPILE_STAGE"] = "1"
# Force enable deep gemm
os.environ["SGL_ENABLE_JIT_DEEPGEMM"] = "1"
# Force enable mha chunked kv for DeepSeek V3 to avoid missing kv_b_proj DeepGEMM case