[Misc] Add extra checking to torchair_graph_config. (#1939)

### What this PR does / why we need it?

cherry-pick #1675  to main
This PR adds validation checking to torchair_graph_config for better
reliability.

Co-authored-by: whx-sjtu <2952154980@qq.com>

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?


- vLLM version: v0.10.0
- vLLM main:
2836dd73f1

Signed-off-by: 22dimensions <waitingwind@foxmail.com>
This commit is contained in:
22dimensions
2025-08-01 09:24:11 +08:00
committed by GitHub
parent 2284289880
commit 8cf97d8310
5 changed files with 100 additions and 6 deletions

View File

@@ -54,17 +54,16 @@ Multi-node:
--master-port=13345
"""
import os
from time import sleep
import contextlib
import gc
import os
from time import sleep
import torch
from vllm import LLM, SamplingParams
from vllm.utils import get_open_port
from vllm.distributed.parallel_state import ( # noqa E402
destroy_distributed_environment, destroy_model_parallel)
from vllm.utils import get_open_port
os.environ["VLLM_USE_MODELSCOPE"] = "True"
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"

View File

@@ -236,3 +236,71 @@ class TestAscendConfig(TestBase):
for model_type, expected_output in test_cases:
self.assertEqual(_check_torchair_supported(model_type),
expected_output)
@_clean_up_ascend_config
def test_ascend_config_load_error(self):
test_vllm_config = VllmConfig()
# graph_batch_sizes should be list.
with self.assertRaises(TypeError):
test_vllm_config.additional_config = {
"torchair_graph_config": {
"graph_batch_sizes": "fake_size",
},
"refresh": True
}
init_ascend_config(test_vllm_config)
# use_cached_graph should not be enabled without torchair graph mode
with self.assertRaises(RuntimeError):
test_vllm_config.additional_config = {
"torchair_graph_config": {
"enabled": False,
"use_cached_graph": True,
},
"refresh": True
}
init_ascend_config(test_vllm_config)
# graph_batch_sizes_init should not be enabled without torchair graph mode
with self.assertRaises(RuntimeError):
test_vllm_config.additional_config = {
"torchair_graph_config": {
"enabled": False,
"graph_batch_sizes_init": True,
},
"refresh": True
}
init_ascend_config(test_vllm_config)
# enable_multistream_mla should not be enabled without torchair graph mode
with self.assertRaises(RuntimeError):
test_vllm_config.additional_config = {
"torchair_graph_config": {
"enabled": False,
"enable_multistream_mla": True,
},
"refresh": True
}
init_ascend_config(test_vllm_config)
# enable_multistream_moe should not be enabled without torchair graph mode
with self.assertRaises(RuntimeError):
test_vllm_config.additional_config = {
"torchair_graph_config": {
"enabled": False,
"enable_multistream_moe": True,
},
"refresh": True
}
init_ascend_config(test_vllm_config)
# enable_kv_nz should not be enabled without torchair graph mode
with self.assertRaises(RuntimeError):
test_vllm_config.additional_config = {
"torchair_graph_config": {
"enabled": False,
"enable_kv_nz": True,
},
"refresh": True
}
init_ascend_config(test_vllm_config)

View File

@@ -76,6 +76,31 @@ class TorchairGraphConfig:
raise ValueError(
"graph_batch_sizes_init is only valid when graph_batch_sizes is empty"
)
if not self.enabled:
if self.use_cached_graph:
raise RuntimeError(
"use_cached_graph is valid only when Torchair graph mode is enabled"
)
if self.graph_batch_sizes:
raise RuntimeError(
"graph_batch_sizes is valid only when Torchair graph mode is enabled"
)
if self.graph_batch_sizes_init:
raise RuntimeError(
"graph_batch_sizes_init is valid only when Torchair graph mode is enabled"
)
if self.enable_multistream_mla:
raise RuntimeError(
"enable_multistream_mla is valid only when Torchair graph mode is enabled"
)
if self.enable_multistream_moe:
raise RuntimeError(
"enable_multistream_moe is valid only when Torchair graph mode is enabled"
)
if self.enable_kv_nz:
raise RuntimeError(
"enable_kv_nz is valid only when Torchair graph mode is enabled"
)
class AscendSchedulerConfig:

View File

@@ -313,7 +313,8 @@ class CustomDeepseekV2MoE(nn.Module):
ascend_config = get_ascend_config()
self.torchair_graph_enabled = ascend_config.torchair_graph_config.enabled
self.enable_multistream_moe = \
ascend_config.torchair_graph_config.enable_multistream_moe
ascend_config.torchair_graph_config.enable_multistream_moe and \
self.torchair_graph_enabled
self.gate = ReplicatedLinear(config.hidden_size,
config.n_routed_experts,

View File

@@ -1232,7 +1232,8 @@ class AscendFusedMoE(FusedMoE):
self.torchair_graph_enabled = ascend_config.torchair_graph_config.enabled
self.enable_multistream_moe = \
ascend_config.torchair_graph_config.enable_multistream_moe
ascend_config.torchair_graph_config.enable_multistream_moe and \
self.torchair_graph_enabled
if self.scoring_func != "softmax" and not self.use_grouped_topk:
raise ValueError("Only softmax scoring function is supported for "