[Misc] Add extra checking to torchair_graph_config. (#1939)

### What this PR does / why we need it?

cherry-pick #1675  to main
This PR adds validation checking to torchair_graph_config for better
reliability.

Co-authored-by: whx-sjtu <2952154980@qq.com>

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?


- vLLM version: v0.10.0
- vLLM main:
2836dd73f1

Signed-off-by: 22dimensions <waitingwind@foxmail.com>
This commit is contained in:
22dimensions
2025-08-01 09:24:11 +08:00
committed by GitHub
parent 2284289880
commit 8cf97d8310
5 changed files with 100 additions and 6 deletions

View File

@@ -54,17 +54,16 @@ Multi-node:
--master-port=13345 --master-port=13345
""" """
import os
from time import sleep
import contextlib import contextlib
import gc import gc
import os
from time import sleep
import torch import torch
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
from vllm.utils import get_open_port
from vllm.distributed.parallel_state import ( # noqa E402 from vllm.distributed.parallel_state import ( # noqa E402
destroy_distributed_environment, destroy_model_parallel) destroy_distributed_environment, destroy_model_parallel)
from vllm.utils import get_open_port
os.environ["VLLM_USE_MODELSCOPE"] = "True" os.environ["VLLM_USE_MODELSCOPE"] = "True"
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"

View File

@@ -236,3 +236,71 @@ class TestAscendConfig(TestBase):
for model_type, expected_output in test_cases: for model_type, expected_output in test_cases:
self.assertEqual(_check_torchair_supported(model_type), self.assertEqual(_check_torchair_supported(model_type),
expected_output) expected_output)
@_clean_up_ascend_config
def test_ascend_config_load_error(self):
test_vllm_config = VllmConfig()
# graph_batch_sizes should be list.
with self.assertRaises(TypeError):
test_vllm_config.additional_config = {
"torchair_graph_config": {
"graph_batch_sizes": "fake_size",
},
"refresh": True
}
init_ascend_config(test_vllm_config)
# use_cached_graph should not be enabled without torchair graph mode
with self.assertRaises(RuntimeError):
test_vllm_config.additional_config = {
"torchair_graph_config": {
"enabled": False,
"use_cached_graph": True,
},
"refresh": True
}
init_ascend_config(test_vllm_config)
# graph_batch_sizes_init should not be enabled without torchair graph mode
with self.assertRaises(RuntimeError):
test_vllm_config.additional_config = {
"torchair_graph_config": {
"enabled": False,
"graph_batch_sizes_init": True,
},
"refresh": True
}
init_ascend_config(test_vllm_config)
# enable_multistream_mla should not be enabled without torchair graph mode
with self.assertRaises(RuntimeError):
test_vllm_config.additional_config = {
"torchair_graph_config": {
"enabled": False,
"enable_multistream_mla": True,
},
"refresh": True
}
init_ascend_config(test_vllm_config)
# enable_multistream_moe should not be enabled without torchair graph mode
with self.assertRaises(RuntimeError):
test_vllm_config.additional_config = {
"torchair_graph_config": {
"enabled": False,
"enable_multistream_moe": True,
},
"refresh": True
}
init_ascend_config(test_vllm_config)
# enable_kv_nz should not be enabled without torchair graph mode
with self.assertRaises(RuntimeError):
test_vllm_config.additional_config = {
"torchair_graph_config": {
"enabled": False,
"enable_kv_nz": True,
},
"refresh": True
}
init_ascend_config(test_vllm_config)

View File

@@ -76,6 +76,31 @@ class TorchairGraphConfig:
raise ValueError( raise ValueError(
"graph_batch_sizes_init is only valid when graph_batch_sizes is empty" "graph_batch_sizes_init is only valid when graph_batch_sizes is empty"
) )
if not self.enabled:
if self.use_cached_graph:
raise RuntimeError(
"use_cached_graph is valid only when Torchair graph mode is enabled"
)
if self.graph_batch_sizes:
raise RuntimeError(
"graph_batch_sizes is valid only when Torchair graph mode is enabled"
)
if self.graph_batch_sizes_init:
raise RuntimeError(
"graph_batch_sizes_init is valid only when Torchair graph mode is enabled"
)
if self.enable_multistream_mla:
raise RuntimeError(
"enable_multistream_mla is valid only when Torchair graph mode is enabled"
)
if self.enable_multistream_moe:
raise RuntimeError(
"enable_multistream_moe is valid only when Torchair graph mode is enabled"
)
if self.enable_kv_nz:
raise RuntimeError(
"enable_kv_nz is valid only when Torchair graph mode is enabled"
)
class AscendSchedulerConfig: class AscendSchedulerConfig:

View File

@@ -313,7 +313,8 @@ class CustomDeepseekV2MoE(nn.Module):
ascend_config = get_ascend_config() ascend_config = get_ascend_config()
self.torchair_graph_enabled = ascend_config.torchair_graph_config.enabled self.torchair_graph_enabled = ascend_config.torchair_graph_config.enabled
self.enable_multistream_moe = \ self.enable_multistream_moe = \
ascend_config.torchair_graph_config.enable_multistream_moe ascend_config.torchair_graph_config.enable_multistream_moe and \
self.torchair_graph_enabled
self.gate = ReplicatedLinear(config.hidden_size, self.gate = ReplicatedLinear(config.hidden_size,
config.n_routed_experts, config.n_routed_experts,

View File

@@ -1232,7 +1232,8 @@ class AscendFusedMoE(FusedMoE):
self.torchair_graph_enabled = ascend_config.torchair_graph_config.enabled self.torchair_graph_enabled = ascend_config.torchair_graph_config.enabled
self.enable_multistream_moe = \ self.enable_multistream_moe = \
ascend_config.torchair_graph_config.enable_multistream_moe ascend_config.torchair_graph_config.enable_multistream_moe and \
self.torchair_graph_enabled
if self.scoring_func != "softmax" and not self.use_grouped_topk: if self.scoring_func != "softmax" and not self.use_grouped_topk:
raise ValueError("Only softmax scoring function is supported for " raise ValueError("Only softmax scoring function is supported for "