Support Qwen3-Next on Ascend NPU (#10379)

This commit is contained in:
Even Zhou
2025-09-13 07:31:37 +08:00
committed by GitHub
parent d5e2a37414
commit 16cd550c85
10 changed files with 79 additions and 26 deletions

View File

@@ -38,6 +38,7 @@ from sglang.srt.utils import (
is_cuda,
is_flashinfer_available,
is_hip,
is_npu,
is_port_available,
is_remote_url,
is_sm90_supported,
@@ -569,7 +570,7 @@ class ServerArgs:
)
self.disable_cuda_graph = True
if self.attention_backend == "ascend":
if is_npu() and self.attention_backend in ["ascend", "hybrid_linear_attn"]:
logger.warning(
"At this moment Ascend attention backend only supports a page_size of 128, change page_size to 128."
)