From b89d37cb11d0bfef42b34ab86af841d4c1e0a59d Mon Sep 17 00:00:00 2001 From: Baron Liu <66629224+lbh2001@users.noreply.github.com> Date: Sat, 2 Aug 2025 07:02:53 +0800 Subject: [PATCH] [bugfix] Add 'disaggregation_mode' parameter to warmup function when compile deep_gemm manually (#8618) --- python/sglang/compile_deep_gemm.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/python/sglang/compile_deep_gemm.py b/python/sglang/compile_deep_gemm.py index 1a17fad89..e59036f7b 100644 --- a/python/sglang/compile_deep_gemm.py +++ b/python/sglang/compile_deep_gemm.py @@ -17,6 +17,7 @@ import time import requests +from sglang.srt.disaggregation.utils import FAKE_BOOTSTRAP_HOST from sglang.srt.entrypoints.http_server import launch_server from sglang.srt.managers.io_struct import GenerateReqInput from sglang.srt.managers.tokenizer_manager import TokenizerManager @@ -52,7 +53,9 @@ class CompileArgs: @warmup("compile-deep-gemm") -async def warm_up_compile(tokenizer_manager: TokenizerManager): +async def warm_up_compile( + disaggregation_mode: str, tokenizer_manager: TokenizerManager +): print("\nGenerate warm up request for compiling DeepGEMM...\n") generate_req_input = GenerateReqInput( input_ids=[0, 1, 2, 3], @@ -62,6 +65,10 @@ async def warm_up_compile(tokenizer_manager: TokenizerManager): "ignore_eos": True, }, ) + if disaggregation_mode != "null": + generate_req_input.bootstrap_room = 0 + generate_req_input.bootstrap_host = FAKE_BOOTSTRAP_HOST + await tokenizer_manager.generate_request(generate_req_input, None).__anext__()