From 3008db9c1a4c558c1aef6e1904a190f960c0343b Mon Sep 17 00:00:00 2001 From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> Date: Mon, 5 May 2025 11:23:15 +0800 Subject: [PATCH] [PD] Allow customizing reserved tokens to avoid KV cache waste (#6002) --- python/sglang/srt/disaggregation/decode.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/sglang/srt/disaggregation/decode.py b/python/sglang/srt/disaggregation/decode.py index 813a5e9cf..a43704cd4 100644 --- a/python/sglang/srt/disaggregation/decode.py +++ b/python/sglang/srt/disaggregation/decode.py @@ -97,7 +97,9 @@ class DecodePreallocQueue: self.tp_size = tp_size self.bootstrap_port = bootstrap_port - self.num_reserved_decode_tokens = 512 + self.num_reserved_decode_tokens = int( + os.environ.get("SGLANG_NUM_RESERVED_DECODE_TOKENS", "512") + ) # Queue for requests pending pre-allocation self.queue: List[DecodeRequest] = []