feat: support flashinfer mla attention for deepseek v3 (#3550)

2025-02-14 08:50:14 +08:00
parent 368de3661e
commit 70f894b810
12 changed files with 299 additions and 135 deletions
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -21,12 +21,13 @@ runtime_common = [
    "hf_transfer", "huggingface_hub", "interegular", "modelscope",
    "orjson", "packaging", "pillow", "prometheus-client>=0.20.0",
    "psutil", "pydantic", "python-multipart", "pyzmq>=25.1.2",
-    "torchao>=0.7.0", "uvicorn", "uvloop", "xgrammar>=0.1.10"
+    "torchao>=0.7.0", "uvicorn", "uvloop", "xgrammar>=0.1.10", "ninja"
 ]
 srt = [
    "sglang[runtime_common]", "cuda-python",
    "sgl-kernel>=0.0.3.post5", "torch", "vllm>=0.6.4.post1,<=0.7.2",
-    "flashinfer_python>=0.2.0.post2", "outlines>=0.0.44,<=0.1.11"
+    "flashinfer_python>=0.2.1.post1",
+    "outlines>=0.0.44,<=0.1.11",
 ]

 # HIP (Heterogeneous-computing Interface for Portability) for AMD