From 34c8898755be67de5d379cdceb0173f6a2b5265c Mon Sep 17 00:00:00 2001 From: Ying Sheng Date: Sun, 9 Mar 2025 01:10:43 -0800 Subject: [PATCH] Check eagle server args (#4217) --- python/sglang/srt/server_args.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 4e6fbdd49..480a415e8 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -284,9 +284,13 @@ class ServerArgs: "Overlap scheduler are disabled because of using " "eagle speculative decoding." ) - # The token generated from the verify step is counted. + # The token generated from the verify step is counted in speculative_num_draft_tokens. # If sepculative_num_steps >= speculative_num_draft_tokens, the additional tokens will definitely be discarded. - # assert self.speculative_num_steps < self.speculative_num_draft_tokens + assert self.speculative_num_steps < self.speculative_num_draft_tokens + assert ( + self.speculative_num_draft_tokens - 1 + <= self.speculative_num_steps * self.speculative_eagle_topk + ) # GGUF if (