[Fix] Add speculative_draft_model_revision to server_args (#5255)

Signed-off-by: Devashish Lal <devashish@rivosinc.com>
This commit is contained in:
DevashishLal-CB
2025-09-05 04:45:46 -07:00
committed by GitHub
parent df97b31f37
commit 13705dae06
13 changed files with 68 additions and 45 deletions

View File

@@ -268,7 +268,7 @@ class TestMTP(CustomTestCase):
"deepep",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
DEFAULT_MODEL_NAME_FOR_TEST_MLA_NEXTN,
"--speculative-num-steps",
"2",
@@ -343,7 +343,7 @@ class TestMTPWithTBO(CustomTestCase):
"3",
"--speculative-num-draft-tokens",
"3",
"--speculative-draft",
"--speculative-draft-model-path",
DEFAULT_MODEL_NAME_FOR_TEST_MLA_NEXTN,
"--chunked-prefill-size",
"256",

View File

@@ -1225,7 +1225,7 @@ class Test30(CustomTestCase):
"8",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -1272,7 +1272,7 @@ class Test31(CustomTestCase):
"4",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -1319,7 +1319,7 @@ class Test32(CustomTestCase):
"8",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -1365,7 +1365,7 @@ class Test33(CustomTestCase):
"1",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -1414,7 +1414,7 @@ class Test34(CustomTestCase):
"1",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -1463,7 +1463,7 @@ class Test35(CustomTestCase):
"1",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -1511,7 +1511,7 @@ class Test36(CustomTestCase):
"--enable-dp-lm-head",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -1559,7 +1559,7 @@ class Test37(CustomTestCase):
"--enable-dp-lm-head",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -1609,7 +1609,7 @@ class Test38(CustomTestCase):
"--enable-dp-lm-head",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -1659,7 +1659,7 @@ class Test39(CustomTestCase):
"--enable-dp-lm-head",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -1709,7 +1709,7 @@ class Test40(CustomTestCase):
"32",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -1762,7 +1762,7 @@ class Test41(CustomTestCase):
"32",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -1815,7 +1815,7 @@ class Test42(CustomTestCase):
"32",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -1867,7 +1867,7 @@ class Test43(CustomTestCase):
"32",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -1922,7 +1922,7 @@ class Test44(CustomTestCase):
"32",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -1977,7 +1977,7 @@ class Test45(CustomTestCase):
"32",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -2031,7 +2031,7 @@ class Test46(CustomTestCase):
"32",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -2085,7 +2085,7 @@ class Test47(CustomTestCase):
"32",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -2141,7 +2141,7 @@ class Test48(CustomTestCase):
"32",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -2197,7 +2197,7 @@ class Test49(CustomTestCase):
"32",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -2243,7 +2243,7 @@ class Test50(CustomTestCase):
"8",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -2292,7 +2292,7 @@ class Test51(CustomTestCase):
"8",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -2341,7 +2341,7 @@ class Test52(CustomTestCase):
"8",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -2389,7 +2389,7 @@ class Test53(CustomTestCase):
"8",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -2440,7 +2440,7 @@ class Test54(CustomTestCase):
"8",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -2491,7 +2491,7 @@ class Test55(CustomTestCase):
"8",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -2541,7 +2541,7 @@ class Test56(CustomTestCase):
"8",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -2591,7 +2591,7 @@ class Test57(CustomTestCase):
"8",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -2643,7 +2643,7 @@ class Test58(CustomTestCase):
"8",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",
@@ -2695,7 +2695,7 @@ class Test59(CustomTestCase):
"8",
"--speculative-algo",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
"2",

View File

@@ -74,7 +74,7 @@ class TestDPAttentionDP2TP2DeepseekV3MTP(CustomTestCase):
"4",
"--speculative-num-draft-tokens",
"4",
"--speculative-draft",
"--speculative-draft-model-path",
DEFAULT_MODEL_NAME_FOR_TEST_MLA_NEXTN,
"--tp-size",
"2",

View File

@@ -146,7 +146,7 @@ class TestFlashAttention3SpeculativeDecode(BaseFlashAttentionTest):
"4",
"--speculative-algorithm",
"EAGLE3",
"--speculative-draft",
"--speculative-draft-model-path",
DEFAULT_MODEL_NAME_FOR_TEST_EAGLE3,
"--speculative-num-steps",
"3",
@@ -180,7 +180,7 @@ class TestFlashAttention3SpeculativeDecodeTopk(BaseFlashAttentionTest):
"4",
"--speculative-algorithm",
"EAGLE3",
"--speculative-draft",
"--speculative-draft-model-path",
DEFAULT_MODEL_NAME_FOR_TEST_EAGLE3,
"--speculative-num-steps",
"5",
@@ -212,7 +212,7 @@ class TestFlashAttention3MLASpeculativeDecode(BaseFlashAttentionTest):
"4",
"--speculative-algorithm",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
DEFAULT_MODEL_NAME_FOR_TEST_MLA_NEXTN,
"--speculative-num-steps",
"3",
@@ -244,7 +244,7 @@ class TestFlashAttention3MLASpeculativeDecodeTopk(BaseFlashAttentionTest):
"4",
"--speculative-algorithm",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
DEFAULT_MODEL_NAME_FOR_TEST_MLA_NEXTN,
"--speculative-num-steps",
"5",

View File

@@ -100,7 +100,7 @@ class TestFlashMLAMTP(CustomTestCase):
"1",
"--speculative-algorithm",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"lmsys/sglang-ci-dsv3-test-NextN",
"--speculative-num-steps",
"1",

View File

@@ -121,7 +121,7 @@ class TestHybridAttnBackendSpeculativeDecoding(TestHybridAttnBackendBase):
return DEFAULT_SERVER_ARGS + [
"--speculative-algorithm",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST,
"--speculative-num-steps",
"3",

View File

@@ -67,7 +67,7 @@ class TestDeepseekV3MTPChannelInt8(CustomTestCase):
"1",
"--speculative-algorithm",
"EAGLE",
"--speculative-draft",
"--speculative-draft-model-path",
"sgl-project/sglang-ci-dsv3-channel-int8-test-NextN",
"--speculative-num-steps",
"2",