From 4536f74251f42c89dd3d8007af4741661f6f2253 Mon Sep 17 00:00:00 2001 From: xiezhongtao Date: Tue, 20 Jan 2026 16:16:12 +0800 Subject: [PATCH] =?UTF-8?q?feat(musa):=20=E6=B7=BB=E5=8A=A0=E5=AF=B9MUSA?= =?UTF-8?q?=E8=AE=BE=E5=A4=87=E7=9A=84=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 添加musa.txt依赖文件并修改setup.py以支持MUSA设备 包括添加_is_musa检查函数,更新版本信息和构建选项 --- requirements/musa.txt | 9 +++++++++ setup.py | 13 +++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 requirements/musa.txt diff --git a/requirements/musa.txt b/requirements/musa.txt new file mode 100644 index 0000000..26fb9b0 --- /dev/null +++ b/requirements/musa.txt @@ -0,0 +1,9 @@ +# Common dependencies +-r common.txt + +numba == 0.61.2 # Required for N-gram speculative decoding + +# Dependencies for NVIDIA GPUs +ray>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1. +torch==2.7.1 +triton == 3.1.0 # FIXME(woosuk): This is a hack to avoid import error. \ No newline at end of file diff --git a/setup.py b/setup.py index 6fcb665..2f65d3b 100644 --- a/setup.py +++ b/setup.py @@ -574,6 +574,9 @@ def _is_cuda() -> bool: has_cuda = torch.version.cuda is not None return VLLM_TARGET_DEVICE == "cuda" and has_cuda and not _is_tpu() +def _is_musa() -> bool: + return VLLM_TARGET_DEVICE == "musa" + def _is_hip() -> bool: return ( @@ -594,7 +597,7 @@ def _is_xpu() -> bool: def _build_custom_ops() -> bool: - return _is_cuda() or _is_hip() or _is_cpu() + return _is_cuda() or _is_hip() or _is_cpu() or _is_musa() def get_rocm_version(): @@ -673,6 +676,8 @@ def get_vllm_version() -> str: rocm_version = get_rocm_version() or torch.version.hip if rocm_version and rocm_version != envs.VLLM_MAIN_CUDA_VERSION: version += f"{sep}rocm{rocm_version.replace('.', '')[:3]}" + elif _is_musa(): + version += f"{sep}musa" elif _is_tpu(): version += f"{sep}tpu" elif _is_cpu(): @@ -726,6 +731,8 @@ def get_requirements() -> list[str]: requirements = _read_requirements("cpu.txt") elif _is_xpu(): requirements = _read_requirements("xpu.txt") + elif _is_musa(): + requirements = _read_requirements("musa.txt") else: raise ValueError("Unsupported platform, please use CUDA, ROCm, or CPU.") return requirements @@ -733,7 +740,7 @@ def get_requirements() -> list[str]: ext_modules = [] -if _is_cuda() or _is_hip(): +if _is_cuda() or _is_hip() or _is_musa(): ext_modules.append(CMakeExtension(name="vllm._moe_C")) ext_modules.append(CMakeExtension(name="vllm.cumem_allocator")) # Optional since this doesn't get built (produce an .so file). This is just @@ -754,6 +761,8 @@ if _is_cuda(): ext_modules.append( CMakeExtension(name="vllm._flashmla_extension_C", optional=True) ) +if _is_musa(): + ext_modules.append(CMakeExtension(name="vllm.vllm_flash_attn._vllm_fa2_C")) if _build_custom_ops(): ext_modules.append(CMakeExtension(name="vllm._C"))