Check eagle server args (#4217)

This commit is contained in:
Ying Sheng
2025-03-09 01:10:43 -08:00
committed by GitHub
parent 0dd6cda288
commit 34c8898755

View File

@@ -284,9 +284,13 @@ class ServerArgs:
"Overlap scheduler are disabled because of using "
"eagle speculative decoding."
)
# The token generated from the verify step is counted.
# The token generated from the verify step is counted in speculative_num_draft_tokens.
# If sepculative_num_steps >= speculative_num_draft_tokens, the additional tokens will definitely be discarded.
# assert self.speculative_num_steps < self.speculative_num_draft_tokens
assert self.speculative_num_steps < self.speculative_num_draft_tokens
assert (
self.speculative_num_draft_tokens - 1
<= self.speculative_num_steps * self.speculative_eagle_topk
)
# GGUF
if (