Disable flaky eagle tests (#5753)

This commit is contained in:
Lianmin Zheng
2025-04-25 15:54:39 -07:00
committed by GitHub
parent 5641a09458
commit 21514ff5bd
5 changed files with 7 additions and 47 deletions

View File

@@ -279,9 +279,9 @@ class CudaGraphRunner:
f"Capture cuda graph failed: {e}\n"
"Possible solutions:\n"
"1. set --mem-fraction-static to a smaller value (e.g., 0.8 or 0.7)\n"
"2. set --cuda-graph-max-bs to a smaller value (e.g., 32)\n"
"2. set --cuda-graph-max-bs to a smaller value (e.g., 16)\n"
"3. disable torch compile by not using --enable-torch-compile\n"
"4. disable cuda graph by --disable-cuda-graph\n"
"4. disable cuda graph by --disable-cuda-graph. (Not recommonded. Huge perf loss)\n"
"Open an issue on GitHub https://github.com/sgl-project/sglang/issues/new/choose \n"
)

View File

@@ -955,12 +955,6 @@ class ModelRunner:
return
if self.server_args.disable_cuda_graph:
logger.warning(
"\n\nCUDA Graph is DISABLED.\n"
"This will cause significant performance degradation.\n"
"CUDA Graph should almost never be disabled in most usage scenarios.\n"
"If you encounter OOM issues, please try setting --mem-fraction-static to a lower value (such as 0.8 or 0.7) instead of disabling CUDA Graph.\n"
)
return
tic = time.time()

View File

@@ -85,9 +85,9 @@ class EAGLEDraftCudaGraphRunner:
f"Capture cuda graph failed: {e}\n"
"Possible solutions:\n"
"1. set --mem-fraction-static to a smaller value (e.g., 0.8 or 0.7)\n"
"2. disable torch compile by not using --enable-torch-compile\n"
"3. specify --dtype to the same dtype (e.g. bfloat16)\n"
"4. disable cuda graph by --disable-cuda-graph\n"
"2. set --cuda-graph-max-bs to a smaller value (e.g., 16)\n"
"3. disable torch compile by not using --enable-torch-compile\n"
"4. disable cuda graph by --disable-cuda-graph. (Not recommonded. Huge perf loss)\n"
"Open an issue on GitHub https://github.com/sgl-project/sglang/issues/new/choose \n"
)