[CI] Follow vLLM FusedMoEParallelConfig interface change and clean up unused config (#1625)
This commit
78fe77534b
from vllm reverted the change for FusedMoEParallelConfig
This PR do the same to fix the CI error
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -5,8 +5,8 @@ from unittest import mock
|
||||
from transformers import PretrainedConfig
|
||||
from vllm.config import ModelConfig, VllmConfig
|
||||
|
||||
from vllm_ascend.ascend_config import (check_ascend_config,
|
||||
check_torchair_supported,
|
||||
from vllm_ascend.ascend_config import (_check_torchair_supported,
|
||||
check_ascend_config,
|
||||
clear_ascend_config, get_ascend_config,
|
||||
init_ascend_config)
|
||||
|
||||
@@ -248,5 +248,5 @@ class TestAscendConfig(unittest.TestCase):
|
||||
test_cases = [('deepseek_v3', True), ('PanguProMoE', True),
|
||||
('qwen', False), ('llama', False)]
|
||||
for model_type, expected_output in test_cases:
|
||||
self.assertEqual(check_torchair_supported(model_type),
|
||||
self.assertEqual(_check_torchair_supported(model_type),
|
||||
expected_output)
|
||||
|
||||
@@ -21,7 +21,7 @@ from vllm.logger import logger
|
||||
TORCHAIR_MODEL_LIST = ["deepseek", "pangu"]
|
||||
|
||||
|
||||
def check_torchair_supported(model_type: str):
|
||||
def _check_torchair_supported(model_type: str):
|
||||
for supported_model in TORCHAIR_MODEL_LIST:
|
||||
if supported_model in model_type.lower():
|
||||
return True
|
||||
@@ -147,10 +147,10 @@ def check_ascend_config(vllm_config, enforce_eager):
|
||||
else:
|
||||
# torchair_graph case
|
||||
if ascend_config.torchair_graph_config.enabled:
|
||||
# torchair_graph is supported for deepseek model only currently.
|
||||
# torchair_graph is supported for deepseek/pangu model only.
|
||||
if vllm_config.model_config:
|
||||
model_type = vllm_config.model_config.hf_config.model_type
|
||||
if not check_torchair_supported(model_type):
|
||||
if not _check_torchair_supported(model_type):
|
||||
raise NotImplementedError(
|
||||
"Torchair graph mode only works with following model types:"
|
||||
f"{TORCHAIR_MODEL_LIST}.")
|
||||
|
||||
@@ -27,7 +27,6 @@ from vllm.attention.backends.utils import PAD_SLOT_ID, CommonAttentionState
|
||||
from vllm.v1.core.sched.output import SchedulerOutput
|
||||
from vllm.v1.worker.gpu_input_batch import InputBatch
|
||||
|
||||
from vllm_ascend.ascend_config import get_ascend_config
|
||||
from vllm_ascend.attention.attention_v1 import AscendAttentionState
|
||||
from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_NZ, aligned_16, is_310p,
|
||||
nd_to_nz_2d)
|
||||
@@ -160,8 +159,6 @@ class AscendAttentionTorchairMetadataBuilder:
|
||||
|
||||
def __init__(self, runner):
|
||||
self.runner = runner
|
||||
self.torchair_graph_enabled = get_ascend_config(
|
||||
).torchair_graph_config.enabled
|
||||
|
||||
def reorder_batch(self, input_batch: "InputBatch",
|
||||
scheduler_output: "SchedulerOutput") -> bool:
|
||||
|
||||
@@ -26,8 +26,7 @@ from vllm.config import get_current_vllm_config
|
||||
from vllm.distributed import (GroupCoordinator, get_tensor_model_parallel_rank,
|
||||
get_tensor_model_parallel_world_size,
|
||||
tensor_model_parallel_all_reduce)
|
||||
from vllm.distributed.parallel_state import (get_dp_group, get_tp_group,
|
||||
get_world_group)
|
||||
from vllm.distributed.parallel_state import get_dp_group, get_tp_group
|
||||
from vllm.forward_context import get_forward_context
|
||||
from vllm.model_executor.layers.fused_moe.layer import (
|
||||
FusedMoE, UnquantizedFusedMoEMethod, determine_expert_map)
|
||||
@@ -1119,21 +1118,12 @@ class AscendFusedMoE(FusedMoE):
|
||||
|
||||
vllm_config = get_current_vllm_config()
|
||||
|
||||
if vllm_version_is("0.9.1"):
|
||||
self.moe_parallel_config = FusedMoEParallelConfig.make(
|
||||
tp_size_=(tp_size if tp_size is not None else
|
||||
get_tensor_model_parallel_world_size()),
|
||||
dp_size_=(dp_size if dp_size is not None else
|
||||
get_dp_group().world_size),
|
||||
vllm_parallel_config=vllm_config.parallel_config)
|
||||
else:
|
||||
self.moe_parallel_config = FusedMoEParallelConfig.make(
|
||||
tp_size_=(tp_size if tp_size is not None else
|
||||
get_tensor_model_parallel_world_size()),
|
||||
dp_size_=(dp_size if dp_size is not None else
|
||||
get_dp_group().world_size),
|
||||
world_size_=get_world_group().world_size,
|
||||
vllm_parallel_config=vllm_config.parallel_config)
|
||||
self.moe_parallel_config = FusedMoEParallelConfig.make(
|
||||
tp_size_=(tp_size if tp_size is not None else
|
||||
get_tensor_model_parallel_world_size()),
|
||||
dp_size_=(dp_size
|
||||
if dp_size is not None else get_dp_group().world_size),
|
||||
vllm_parallel_config=vllm_config.parallel_config)
|
||||
|
||||
self.top_k = top_k
|
||||
self.num_experts = num_experts
|
||||
|
||||
Reference in New Issue
Block a user