From 1646149a8336741faac914706e85fe7fcd08dfb2 Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Sun, 9 Feb 2025 23:16:20 +0800 Subject: [PATCH] fix draft cuda graph capture failure (#3431) --- python/sglang/srt/speculative/eagle_draft_cuda_graph_runner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/sglang/srt/speculative/eagle_draft_cuda_graph_runner.py b/python/sglang/srt/speculative/eagle_draft_cuda_graph_runner.py index 41ff5c19e..5a79a9809 100644 --- a/python/sglang/srt/speculative/eagle_draft_cuda_graph_runner.py +++ b/python/sglang/srt/speculative/eagle_draft_cuda_graph_runner.py @@ -85,6 +85,7 @@ class EAGLEDraftCudaGraphRunner: "1. disable cuda graph by --disable-cuda-graph\n" "2. set --mem-fraction-static to a smaller value (e.g., 0.8 or 0.7)\n" "3. disable torch compile by not using --enable-torch-compile\n" + "4. specify --dtype to the same dtype (e.g. bfloat16)\n" "Open an issue on GitHub https://github.com/sgl-project/sglang/issues/new/choose \n" )