[CI] Follow vLLM FusedMoEParallelConfig interface change and clean up unused config (#1625)

This commit
78fe77534b
from vllm reverted the change for FusedMoEParallelConfig

This PR do the same to fix the CI error

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-07-04 17:54:33 +08:00
committed by GitHub
parent 4e910186de
commit 343955c7ac
4 changed files with 13 additions and 26 deletions

View File

@@ -5,8 +5,8 @@ from unittest import mock
from transformers import PretrainedConfig
from vllm.config import ModelConfig, VllmConfig
from vllm_ascend.ascend_config import (check_ascend_config,
check_torchair_supported,
from vllm_ascend.ascend_config import (_check_torchair_supported,
check_ascend_config,
clear_ascend_config, get_ascend_config,
init_ascend_config)
@@ -248,5 +248,5 @@ class TestAscendConfig(unittest.TestCase):
test_cases = [('deepseek_v3', True), ('PanguProMoE', True),
('qwen', False), ('llama', False)]
for model_type, expected_output in test_cases:
self.assertEqual(check_torchair_supported(model_type),
self.assertEqual(_check_torchair_supported(model_type),
expected_output)

View File

@@ -21,7 +21,7 @@ from vllm.logger import logger
TORCHAIR_MODEL_LIST = ["deepseek", "pangu"]
def check_torchair_supported(model_type: str):
def _check_torchair_supported(model_type: str):
for supported_model in TORCHAIR_MODEL_LIST:
if supported_model in model_type.lower():
return True
@@ -147,10 +147,10 @@ def check_ascend_config(vllm_config, enforce_eager):
else:
# torchair_graph case
if ascend_config.torchair_graph_config.enabled:
# torchair_graph is supported for deepseek model only currently.
# torchair_graph is supported for deepseek/pangu model only.
if vllm_config.model_config:
model_type = vllm_config.model_config.hf_config.model_type
if not check_torchair_supported(model_type):
if not _check_torchair_supported(model_type):
raise NotImplementedError(
"Torchair graph mode only works with following model types:"
f"{TORCHAIR_MODEL_LIST}.")

View File

@@ -27,7 +27,6 @@ from vllm.attention.backends.utils import PAD_SLOT_ID, CommonAttentionState
from vllm.v1.core.sched.output import SchedulerOutput
from vllm.v1.worker.gpu_input_batch import InputBatch
from vllm_ascend.ascend_config import get_ascend_config
from vllm_ascend.attention.attention_v1 import AscendAttentionState
from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_NZ, aligned_16, is_310p,
nd_to_nz_2d)
@@ -160,8 +159,6 @@ class AscendAttentionTorchairMetadataBuilder:
def __init__(self, runner):
self.runner = runner
self.torchair_graph_enabled = get_ascend_config(
).torchair_graph_config.enabled
def reorder_batch(self, input_batch: "InputBatch",
scheduler_output: "SchedulerOutput") -> bool:

View File

@@ -26,8 +26,7 @@ from vllm.config import get_current_vllm_config
from vllm.distributed import (GroupCoordinator, get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size,
tensor_model_parallel_all_reduce)
from vllm.distributed.parallel_state import (get_dp_group, get_tp_group,
get_world_group)
from vllm.distributed.parallel_state import get_dp_group, get_tp_group
from vllm.forward_context import get_forward_context
from vllm.model_executor.layers.fused_moe.layer import (
FusedMoE, UnquantizedFusedMoEMethod, determine_expert_map)
@@ -1119,21 +1118,12 @@ class AscendFusedMoE(FusedMoE):
vllm_config = get_current_vllm_config()
if vllm_version_is("0.9.1"):
self.moe_parallel_config = FusedMoEParallelConfig.make(
tp_size_=(tp_size if tp_size is not None else
get_tensor_model_parallel_world_size()),
dp_size_=(dp_size if dp_size is not None else
get_dp_group().world_size),
vllm_parallel_config=vllm_config.parallel_config)
else:
self.moe_parallel_config = FusedMoEParallelConfig.make(
tp_size_=(tp_size if tp_size is not None else
get_tensor_model_parallel_world_size()),
dp_size_=(dp_size if dp_size is not None else
get_dp_group().world_size),
world_size_=get_world_group().world_size,
vllm_parallel_config=vllm_config.parallel_config)
self.moe_parallel_config = FusedMoEParallelConfig.make(
tp_size_=(tp_size if tp_size is not None else
get_tensor_model_parallel_world_size()),
dp_size_=(dp_size
if dp_size is not None else get_dp_group().world_size),
vllm_parallel_config=vllm_config.parallel_config)
self.top_k = top_k
self.num_experts = num_experts