feat: support flashinfer mla attention for deepseek v3 (#3550)

This commit is contained in:
Yineng Zhang
2025-02-14 08:50:14 +08:00
committed by GitHub
parent 368de3661e
commit 70f894b810
12 changed files with 299 additions and 135 deletions

View File

@@ -28,6 +28,7 @@ class TestEAGLEEngine(unittest.TestCase):
"speculative_eagle_topk": 8,
"speculative_num_draft_tokens": 64,
"mem_fraction_static": 0.7,
"cuda_graph_max_bs": 32,
}
def setUp(self):
@@ -124,6 +125,8 @@ class TestEAGLEServer(unittest.TestCase):
"64",
"--mem-fraction-static",
"0.7",
"--cuda-graph-max-bs",
"32",
],
)