diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 4e6fbdd49..480a415e8 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -284,9 +284,13 @@ class ServerArgs: "Overlap scheduler are disabled because of using " "eagle speculative decoding." ) - # The token generated from the verify step is counted. + # The token generated from the verify step is counted in speculative_num_draft_tokens. # If sepculative_num_steps >= speculative_num_draft_tokens, the additional tokens will definitely be discarded. - # assert self.speculative_num_steps < self.speculative_num_draft_tokens + assert self.speculative_num_steps < self.speculative_num_draft_tokens + assert ( + self.speculative_num_draft_tokens - 1 + <= self.speculative_num_steps * self.speculative_eagle_topk + ) # GGUF if (