Cleaning codes for speculative attention mode (#10149)

This commit is contained in:
Baizhou Zhang
2025-09-08 17:38:06 -07:00
committed by GitHub
parent 148022fc36
commit 8ad700f735
7 changed files with 14 additions and 35 deletions

View File

@@ -111,27 +111,6 @@ class TestHybridAttnBackendTorchCompile(TestHybridAttnBackendBase):
return DEFAULT_SERVER_ARGS + ["--enable-torch-compile"]
class TestHybridAttnBackendSpeculativeDecoding(TestHybridAttnBackendBase):
speculative_decode = True
# This eagle test uses a very small model, so the accuracy is low.
accuracy_threshold = 0.2
@classmethod
def get_server_args(cls):
return DEFAULT_SERVER_ARGS + [
"--speculative-algorithm",
"EAGLE",
"--speculative-draft-model-path",
DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST,
"--speculative-num-steps",
"3",
"--speculative-eagle-topk",
"2",
"--speculative-num-draft-tokens",
"4",
]
class TestHybridAttnBackendSpeculativeDecodingPrefillBackend(TestHybridAttnBackendBase):
speculative_decode = True
# This eagle test uses a very small model, so the accuracy is low.
@@ -150,7 +129,7 @@ class TestHybridAttnBackendSpeculativeDecodingPrefillBackend(TestHybridAttnBacke
"2",
"--speculative-num-draft-tokens",
"4",
"--speculative-attention-backend",
"--speculative-attention-mode",
"prefill",
]
@@ -173,7 +152,7 @@ class TestHybridAttnBackendSpeculativeDecodingDecodeBackend(TestHybridAttnBacken
"2",
"--speculative-num-draft-tokens",
"4",
"--speculative-attention-backend",
"--speculative-attention-mode",
"decode",
]