[releases/v0.18.0][Build][BugFix] support ascend950 npu-smi info interface changes and make SOC_VERSION actually take effect (#8061)

### What this PR does / why we need it?
Cherry-picked from #8062 

This PR adds support for the Ascend950 NPU by updating the `npu-smi
info` parsing logic to handle interface changes. It also improves
robustness by ensuring that `SOC_VERSION` actually takes effect by
disabling `get_chip_type` given this environment variable.


### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
CI passed.

Signed-off-by: linfeng-yuan <1102311262@qq.com>
This commit is contained in:
linfeng-yuan
2026-04-10 16:44:38 +08:00
committed by GitHub
parent 34386c8896
commit bd9927d5a9

View File

@@ -72,14 +72,31 @@ def get_value_from_lines(lines: list[str], key: str) -> str:
def get_chip_type() -> str:
try:
# Get NPU ID
npu_info_lines = subprocess.check_output(["npu-smi", "info", "-l"]).decode().strip().split("\n")
npu_id = int(get_value_from_lines(npu_info_lines, "NPU ID"))
chip_info_lines = (
subprocess.check_output(["npu-smi", "info", "-t", "board", "-i", str(npu_id), "-c", "0"])
.decode()
.strip()
.split("\n")
# Stage 1: query board info without -c flag
board_info_lines = (
subprocess.check_output(["npu-smi", "info", "-t", "board", "-i", str(npu_id)]).decode().strip().split("\n")
)
# Check if Chip Name exists (Ascend950 includes it directly)
chip_name = get_value_from_lines(board_info_lines, "Chip Name")
# Stage 2: query with -c flag only if Chip Name not found (A2/A3/310P)
if not chip_name:
chip_info_lines = (
subprocess.check_output(["npu-smi", "info", "-t", "board", "-i", str(npu_id), "-c", "0"])
.decode()
.strip()
.split("\n")
)
else:
# Ascend950 already has complete info
chip_info_lines = board_info_lines
# Extract required fields
chip_name = get_value_from_lines(chip_info_lines, "Chip Name")
chip_type = get_value_from_lines(chip_info_lines, "Chip Type")
npu_name = get_value_from_lines(chip_info_lines, "NPU Name")
@@ -113,9 +130,8 @@ def get_chip_type() -> str:
envs = load_module_from_path("envs", os.path.join(ROOT_DIR, "vllm_ascend", "envs.py"))
soc_version = get_chip_type()
if not envs.SOC_VERSION:
soc_version = get_chip_type()
if not soc_version:
raise RuntimeError(
"Could not determine chip type automatically via 'npu-smi'. "
@@ -128,9 +144,6 @@ if not envs.SOC_VERSION:
"You can also refer to the SOC_VERSION defaults in Dockerfile*."
)
envs.SOC_VERSION = soc_version
else:
if soc_version and soc_version != envs.SOC_VERSION:
logging.warning(f"env SOC_VERSION: {envs.SOC_VERSION} is not equal to soc_version from npu-smi: {soc_version}")
def gen_build_info():