[CI] Upgrade vLLM to 20250919 (6d8246aa) and fix some broken issue (#2907)
### What this PR does / why we need it? 1. This pr bump vllm commit to6d8246aaff2. fix upstream changes https://github.com/vllm-project/vllm/pull/24548 abort multi-modal kwargs, make vllm main and `v0.10.2` both adaptable 3. fix metadata_builder changes introduced by https://github.com/vllm-project/vllm/pull/23693 4. fix `structured_outputs_config` changes introduced by https://github.com/vllm-project/vllm/pull/22772 5. fix `moe_config` changes introduced by https://github.com/vllm-project/vllm/pull/22537 Co-authored-by: MengqingCao <cmq0113@163.com> Co-authored-by: Yikun Jiang <yikunkero@gmail.com> - vLLM version: v0.10.2 - vLLM main:c60e6137f0--------- Signed-off-by: wangli <wangli858794774@gmail.com> Signed-off-by: MengqingCao <cmq0113@163.com> Co-authored-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
@@ -47,7 +47,8 @@ from vllm_ascend.ops.moe.moe_comm_method import (AllGatherCommImpl,
|
||||
from vllm_ascend.ops.sequence_parallel import MetadataForPadding
|
||||
from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_NZ,
|
||||
get_all_reduce_merge_state,
|
||||
get_rm_router_logits_state, is_310p)
|
||||
get_rm_router_logits_state, is_310p,
|
||||
vllm_version_is)
|
||||
|
||||
|
||||
class AscendUnquantizedFusedMoEMethod(UnquantizedFusedMoEMethod):
|
||||
@@ -278,16 +279,25 @@ class AscendFusedMoE(FusedMoE):
|
||||
if self.scoring_func != "softmax" and not self.use_grouped_topk:
|
||||
raise ValueError("Only softmax scoring function is supported for "
|
||||
"non-grouped topk.")
|
||||
moe = FusedMoEConfig.make(
|
||||
num_experts=self.global_num_experts,
|
||||
experts_per_token=top_k,
|
||||
hidden_dim=hidden_size,
|
||||
num_local_experts=self.local_num_experts,
|
||||
moe_parallel_config=self.moe_parallel_config,
|
||||
# TODO (bnell): this needs to be fixed for quantized types.
|
||||
in_dtype=params_dtype,
|
||||
quant_config=quant_config)
|
||||
|
||||
if vllm_version_is("0.10.2"):
|
||||
moe = FusedMoEConfig.make(
|
||||
num_experts=self.global_num_experts,
|
||||
experts_per_token=top_k,
|
||||
hidden_dim=hidden_size,
|
||||
num_local_experts=self.local_num_experts,
|
||||
moe_parallel_config=self.moe_parallel_config,
|
||||
# TODO (bnell): this needs to be fixed for quantized types.
|
||||
in_dtype=params_dtype,
|
||||
quant_config=quant_config)
|
||||
else:
|
||||
moe = FusedMoEConfig(
|
||||
num_experts=self.global_num_experts,
|
||||
experts_per_token=top_k,
|
||||
hidden_dim=hidden_size,
|
||||
num_local_experts=self.local_num_experts,
|
||||
moe_parallel_config=self.moe_parallel_config,
|
||||
in_dtype=params_dtype,
|
||||
)
|
||||
self.moe_config = moe
|
||||
|
||||
if quant_config is None:
|
||||
|
||||
Reference in New Issue
Block a user