Cleaning codes for speculative attention mode (#10149)

2025-09-08 17:38:06 -07:00
parent 148022fc36
commit 8ad700f735
7 changed files with 14 additions and 35 deletions
--- a/test/srt/test_hybrid_attn_backend.py
+++ b/test/srt/test_hybrid_attn_backend.py
@@ -111,27 +111,6 @@ class TestHybridAttnBackendTorchCompile(TestHybridAttnBackendBase):
        return DEFAULT_SERVER_ARGS + ["--enable-torch-compile"]


-class TestHybridAttnBackendSpeculativeDecoding(TestHybridAttnBackendBase):
-    speculative_decode = True
-    # This eagle test uses a very small model, so the accuracy is low.
-    accuracy_threshold = 0.2
-
-    @classmethod
-    def get_server_args(cls):
-        return DEFAULT_SERVER_ARGS + [
-            "--speculative-algorithm",
-            "EAGLE",
-            "--speculative-draft-model-path",
-            DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST,
-            "--speculative-num-steps",
-            "3",
-            "--speculative-eagle-topk",
-            "2",
-            "--speculative-num-draft-tokens",
-            "4",
-        ]
-
-
 class TestHybridAttnBackendSpeculativeDecodingPrefillBackend(TestHybridAttnBackendBase):
    speculative_decode = True
    # This eagle test uses a very small model, so the accuracy is low.
@@ -150,7 +129,7 @@ class TestHybridAttnBackendSpeculativeDecodingPrefillBackend(TestHybridAttnBacke
            "2",
            "--speculative-num-draft-tokens",
            "4",
-            "--speculative-attention-backend",
+            "--speculative-attention-mode",
            "prefill",
        ]

@@ -173,7 +152,7 @@ class TestHybridAttnBackendSpeculativeDecodingDecodeBackend(TestHybridAttnBacken
            "2",
            "--speculative-num-draft-tokens",
            "4",
-            "--speculative-attention-backend",
+            "--speculative-attention-mode",
            "decode",
        ]