Clean up server_args, triton cache manager (#8332)

This commit is contained in:
Lianmin Zheng
2025-07-25 14:14:51 -07:00
committed by GitHub
parent f8260f2539
commit ed2e313eb6
12 changed files with 128 additions and 204 deletions

View File

@@ -101,7 +101,7 @@ class TestDeepseekMTP(CustomTestCase):
"--max-running-requests",
"512",
"--speculative-algorithm",
"NEXTN",
"EAGLE",
"--speculative-num-steps",
"1",
"--speculative-eagle-topk",

View File

@@ -261,7 +261,7 @@ class TestMTP(CustomTestCase):
"--enable-dp-lm-head",
"--enable-deepep-moe",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
DEFAULT_MODEL_NAME_FOR_TEST_MLA_NEXTN,
"--speculative-num-steps",
@@ -329,7 +329,7 @@ class TestMTPWithTBO(CustomTestCase):
"--enable-deepep-moe",
"--trust-remote-code",
"--speculative-algorithm",
"NEXTN",
"EAGLE",
"--speculative-num-steps",
"2",
"--speculative-eagle-topk",

View File

@@ -1224,7 +1224,7 @@ class Test30(CustomTestCase):
"--tp",
"8",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -1271,7 +1271,7 @@ class Test31(CustomTestCase):
"--dp",
"4",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -1318,7 +1318,7 @@ class Test32(CustomTestCase):
"--dp",
"8",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -1364,7 +1364,7 @@ class Test33(CustomTestCase):
"--moe-dense-tp-size",
"1",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -1413,7 +1413,7 @@ class Test34(CustomTestCase):
"--moe-dense-tp-size",
"1",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -1462,7 +1462,7 @@ class Test35(CustomTestCase):
"--moe-dense-tp-size",
"1",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -1510,7 +1510,7 @@ class Test36(CustomTestCase):
"4",
"--enable-dp-lm-head",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -1558,7 +1558,7 @@ class Test37(CustomTestCase):
"8",
"--enable-dp-lm-head",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -1608,7 +1608,7 @@ class Test38(CustomTestCase):
"1",
"--enable-dp-lm-head",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -1658,7 +1658,7 @@ class Test39(CustomTestCase):
"1",
"--enable-dp-lm-head",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -1709,7 +1709,7 @@ class Test40(CustomTestCase):
"--max-running-requests",
"32",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -1763,7 +1763,7 @@ class Test41(CustomTestCase):
"--max-running-requests",
"32",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -1817,7 +1817,7 @@ class Test42(CustomTestCase):
"--max-running-requests",
"32",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -1870,7 +1870,7 @@ class Test43(CustomTestCase):
"--max-running-requests",
"32",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -1926,7 +1926,7 @@ class Test44(CustomTestCase):
"--max-running-requests",
"32",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -1982,7 +1982,7 @@ class Test45(CustomTestCase):
"--max-running-requests",
"32",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -2037,7 +2037,7 @@ class Test46(CustomTestCase):
"--max-running-requests",
"32",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -2092,7 +2092,7 @@ class Test47(CustomTestCase):
"--max-running-requests",
"32",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -2149,7 +2149,7 @@ class Test48(CustomTestCase):
"--max-running-requests",
"32",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -2206,7 +2206,7 @@ class Test49(CustomTestCase):
"--max-running-requests",
"32",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -2251,7 +2251,7 @@ class Test50(CustomTestCase):
"8",
"--enable-ep-moe",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -2299,7 +2299,7 @@ class Test51(CustomTestCase):
"4",
"--enable-ep-moe",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -2347,7 +2347,7 @@ class Test52(CustomTestCase):
"8",
"--enable-ep-moe",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -2394,7 +2394,7 @@ class Test53(CustomTestCase):
"1",
"--enable-ep-moe",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -2444,7 +2444,7 @@ class Test54(CustomTestCase):
"1",
"--enable-ep-moe",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -2494,7 +2494,7 @@ class Test55(CustomTestCase):
"1",
"--enable-ep-moe",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -2543,7 +2543,7 @@ class Test56(CustomTestCase):
"--enable-dp-lm-head",
"--enable-ep-moe",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -2592,7 +2592,7 @@ class Test57(CustomTestCase):
"--enable-dp-lm-head",
"--enable-ep-moe",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -2643,7 +2643,7 @@ class Test58(CustomTestCase):
"--enable-dp-lm-head",
"--enable-ep-moe",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",
@@ -2694,7 +2694,7 @@ class Test59(CustomTestCase):
"--enable-dp-lm-head",
"--enable-ep-moe",
"--speculative-algo",
"NEXTN",
"EAGLE",
"--speculative-draft",
"lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps",