[CI] Follow vLLM FusedMoEParallelConfig interface change and clean up unused config (#1625)
This commit
78fe77534b
from vllm reverted the change for FusedMoEParallelConfig
This PR do the same to fix the CI error
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -5,8 +5,8 @@ from unittest import mock
|
|||||||
from transformers import PretrainedConfig
|
from transformers import PretrainedConfig
|
||||||
from vllm.config import ModelConfig, VllmConfig
|
from vllm.config import ModelConfig, VllmConfig
|
||||||
|
|
||||||
from vllm_ascend.ascend_config import (check_ascend_config,
|
from vllm_ascend.ascend_config import (_check_torchair_supported,
|
||||||
check_torchair_supported,
|
check_ascend_config,
|
||||||
clear_ascend_config, get_ascend_config,
|
clear_ascend_config, get_ascend_config,
|
||||||
init_ascend_config)
|
init_ascend_config)
|
||||||
|
|
||||||
@@ -248,5 +248,5 @@ class TestAscendConfig(unittest.TestCase):
|
|||||||
test_cases = [('deepseek_v3', True), ('PanguProMoE', True),
|
test_cases = [('deepseek_v3', True), ('PanguProMoE', True),
|
||||||
('qwen', False), ('llama', False)]
|
('qwen', False), ('llama', False)]
|
||||||
for model_type, expected_output in test_cases:
|
for model_type, expected_output in test_cases:
|
||||||
self.assertEqual(check_torchair_supported(model_type),
|
self.assertEqual(_check_torchair_supported(model_type),
|
||||||
expected_output)
|
expected_output)
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ from vllm.logger import logger
|
|||||||
TORCHAIR_MODEL_LIST = ["deepseek", "pangu"]
|
TORCHAIR_MODEL_LIST = ["deepseek", "pangu"]
|
||||||
|
|
||||||
|
|
||||||
def check_torchair_supported(model_type: str):
|
def _check_torchair_supported(model_type: str):
|
||||||
for supported_model in TORCHAIR_MODEL_LIST:
|
for supported_model in TORCHAIR_MODEL_LIST:
|
||||||
if supported_model in model_type.lower():
|
if supported_model in model_type.lower():
|
||||||
return True
|
return True
|
||||||
@@ -147,10 +147,10 @@ def check_ascend_config(vllm_config, enforce_eager):
|
|||||||
else:
|
else:
|
||||||
# torchair_graph case
|
# torchair_graph case
|
||||||
if ascend_config.torchair_graph_config.enabled:
|
if ascend_config.torchair_graph_config.enabled:
|
||||||
# torchair_graph is supported for deepseek model only currently.
|
# torchair_graph is supported for deepseek/pangu model only.
|
||||||
if vllm_config.model_config:
|
if vllm_config.model_config:
|
||||||
model_type = vllm_config.model_config.hf_config.model_type
|
model_type = vllm_config.model_config.hf_config.model_type
|
||||||
if not check_torchair_supported(model_type):
|
if not _check_torchair_supported(model_type):
|
||||||
raise NotImplementedError(
|
raise NotImplementedError(
|
||||||
"Torchair graph mode only works with following model types:"
|
"Torchair graph mode only works with following model types:"
|
||||||
f"{TORCHAIR_MODEL_LIST}.")
|
f"{TORCHAIR_MODEL_LIST}.")
|
||||||
|
|||||||
@@ -27,7 +27,6 @@ from vllm.attention.backends.utils import PAD_SLOT_ID, CommonAttentionState
|
|||||||
from vllm.v1.core.sched.output import SchedulerOutput
|
from vllm.v1.core.sched.output import SchedulerOutput
|
||||||
from vllm.v1.worker.gpu_input_batch import InputBatch
|
from vllm.v1.worker.gpu_input_batch import InputBatch
|
||||||
|
|
||||||
from vllm_ascend.ascend_config import get_ascend_config
|
|
||||||
from vllm_ascend.attention.attention_v1 import AscendAttentionState
|
from vllm_ascend.attention.attention_v1 import AscendAttentionState
|
||||||
from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_NZ, aligned_16, is_310p,
|
from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_NZ, aligned_16, is_310p,
|
||||||
nd_to_nz_2d)
|
nd_to_nz_2d)
|
||||||
@@ -160,8 +159,6 @@ class AscendAttentionTorchairMetadataBuilder:
|
|||||||
|
|
||||||
def __init__(self, runner):
|
def __init__(self, runner):
|
||||||
self.runner = runner
|
self.runner = runner
|
||||||
self.torchair_graph_enabled = get_ascend_config(
|
|
||||||
).torchair_graph_config.enabled
|
|
||||||
|
|
||||||
def reorder_batch(self, input_batch: "InputBatch",
|
def reorder_batch(self, input_batch: "InputBatch",
|
||||||
scheduler_output: "SchedulerOutput") -> bool:
|
scheduler_output: "SchedulerOutput") -> bool:
|
||||||
|
|||||||
@@ -26,8 +26,7 @@ from vllm.config import get_current_vllm_config
|
|||||||
from vllm.distributed import (GroupCoordinator, get_tensor_model_parallel_rank,
|
from vllm.distributed import (GroupCoordinator, get_tensor_model_parallel_rank,
|
||||||
get_tensor_model_parallel_world_size,
|
get_tensor_model_parallel_world_size,
|
||||||
tensor_model_parallel_all_reduce)
|
tensor_model_parallel_all_reduce)
|
||||||
from vllm.distributed.parallel_state import (get_dp_group, get_tp_group,
|
from vllm.distributed.parallel_state import get_dp_group, get_tp_group
|
||||||
get_world_group)
|
|
||||||
from vllm.forward_context import get_forward_context
|
from vllm.forward_context import get_forward_context
|
||||||
from vllm.model_executor.layers.fused_moe.layer import (
|
from vllm.model_executor.layers.fused_moe.layer import (
|
||||||
FusedMoE, UnquantizedFusedMoEMethod, determine_expert_map)
|
FusedMoE, UnquantizedFusedMoEMethod, determine_expert_map)
|
||||||
@@ -1119,21 +1118,12 @@ class AscendFusedMoE(FusedMoE):
|
|||||||
|
|
||||||
vllm_config = get_current_vllm_config()
|
vllm_config = get_current_vllm_config()
|
||||||
|
|
||||||
if vllm_version_is("0.9.1"):
|
self.moe_parallel_config = FusedMoEParallelConfig.make(
|
||||||
self.moe_parallel_config = FusedMoEParallelConfig.make(
|
tp_size_=(tp_size if tp_size is not None else
|
||||||
tp_size_=(tp_size if tp_size is not None else
|
get_tensor_model_parallel_world_size()),
|
||||||
get_tensor_model_parallel_world_size()),
|
dp_size_=(dp_size
|
||||||
dp_size_=(dp_size if dp_size is not None else
|
if dp_size is not None else get_dp_group().world_size),
|
||||||
get_dp_group().world_size),
|
vllm_parallel_config=vllm_config.parallel_config)
|
||||||
vllm_parallel_config=vllm_config.parallel_config)
|
|
||||||
else:
|
|
||||||
self.moe_parallel_config = FusedMoEParallelConfig.make(
|
|
||||||
tp_size_=(tp_size if tp_size is not None else
|
|
||||||
get_tensor_model_parallel_world_size()),
|
|
||||||
dp_size_=(dp_size if dp_size is not None else
|
|
||||||
get_dp_group().world_size),
|
|
||||||
world_size_=get_world_group().world_size,
|
|
||||||
vllm_parallel_config=vllm_config.parallel_config)
|
|
||||||
|
|
||||||
self.top_k = top_k
|
self.top_k = top_k
|
||||||
self.num_experts = num_experts
|
self.num_experts = num_experts
|
||||||
|
|||||||
Reference in New Issue
Block a user