From 4536f74251f42c89dd3d8007af4741661f6f2253 Mon Sep 17 00:00:00 2001
From: xiezhongtao <i-xiezhongtao@4paradigm.com>
Date: Tue, 20 Jan 2026 16:16:12 +0800
Subject: [PATCH] =?UTF-8?q?feat(musa):=20=E6=B7=BB=E5=8A=A0=E5=AF=B9MUSA?=
 =?UTF-8?q?=E8=AE=BE=E5=A4=87=E7=9A=84=E6=94=AF=E6=8C=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

添加musa.txt依赖文件并修改setup.py以支持MUSA设备
包括添加_is_musa检查函数，更新版本信息和构建选项
---
 requirements/musa.txt |  9 +++++++++
 setup.py              | 13 +++++++++++--
 2 files changed, 20 insertions(+), 2 deletions(-)
 create mode 100644 requirements/musa.txt

diff --git a/requirements/musa.txt b/requirements/musa.txt
new file mode 100644
index 0000000..26fb9b0
--- /dev/null
+++ b/requirements/musa.txt
@@ -0,0 +1,9 @@
+# Common dependencies
+-r common.txt
+
+numba == 0.61.2 # Required for N-gram speculative decoding
+
+# Dependencies for NVIDIA GPUs
+ray>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
+torch==2.7.1
+triton == 3.1.0  # FIXME(woosuk): This is a hack to avoid import error.
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 6fcb665..2f65d3b 100644
--- a/setup.py
+++ b/setup.py
@@ -574,6 +574,9 @@ def _is_cuda() -> bool:
     has_cuda = torch.version.cuda is not None
     return VLLM_TARGET_DEVICE == "cuda" and has_cuda and not _is_tpu()
 
+def _is_musa() -> bool:
+    return VLLM_TARGET_DEVICE == "musa"
+
 
 def _is_hip() -> bool:
     return (
@@ -594,7 +597,7 @@ def _is_xpu() -> bool:
 
 
 def _build_custom_ops() -> bool:
-    return _is_cuda() or _is_hip() or _is_cpu()
+    return _is_cuda() or _is_hip() or _is_cpu() or _is_musa()
 
 
 def get_rocm_version():
@@ -673,6 +676,8 @@ def get_vllm_version() -> str:
         rocm_version = get_rocm_version() or torch.version.hip
         if rocm_version and rocm_version != envs.VLLM_MAIN_CUDA_VERSION:
             version += f"{sep}rocm{rocm_version.replace('.', '')[:3]}"
+    elif _is_musa():
+        version += f"{sep}musa"
     elif _is_tpu():
         version += f"{sep}tpu"
     elif _is_cpu():
@@ -726,6 +731,8 @@ def get_requirements() -> list[str]:
         requirements = _read_requirements("cpu.txt")
     elif _is_xpu():
         requirements = _read_requirements("xpu.txt")
+    elif _is_musa():
+        requirements = _read_requirements("musa.txt")
     else:
         raise ValueError("Unsupported platform, please use CUDA, ROCm, or CPU.")
     return requirements
@@ -733,7 +740,7 @@ def get_requirements() -> list[str]:
 
 ext_modules = []
 
-if _is_cuda() or _is_hip():
+if _is_cuda() or _is_hip() or _is_musa():
     ext_modules.append(CMakeExtension(name="vllm._moe_C"))
     ext_modules.append(CMakeExtension(name="vllm.cumem_allocator"))
     # Optional since this doesn't get built (produce an .so file). This is just
@@ -754,6 +761,8 @@ if _is_cuda():
         ext_modules.append(
             CMakeExtension(name="vllm._flashmla_extension_C", optional=True)
         )
+if _is_musa():
+    ext_modules.append(CMakeExtension(name="vllm.vllm_flash_attn._vllm_fa2_C"))
 
 if _build_custom_ops():
     ext_modules.append(CMakeExtension(name="vllm._C"))