diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 480a415e8..4e6fbdd49 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -284,13 +284,9 @@ class ServerArgs: "Overlap scheduler are disabled because of using " "eagle speculative decoding." ) - # The token generated from the verify step is counted in speculative_num_draft_tokens. + # The token generated from the verify step is counted. # If sepculative_num_steps >= speculative_num_draft_tokens, the additional tokens will definitely be discarded. - assert self.speculative_num_steps < self.speculative_num_draft_tokens - assert ( - self.speculative_num_draft_tokens - 1 - <= self.speculative_num_steps * self.speculative_eagle_topk - ) + # assert self.speculative_num_steps < self.speculative_num_draft_tokens # GGUF if (