[CI] Fix CI Break: upstream adds routed_scaling_factor in forward_oot interface (#2675)

### What this PR does / why we need it? Fix CI Break: upstream adds routed_scaling_factor in forward_oot interface, vllm-ascend needs to adapt ### Does this PR introduce _any_ user-facing change? NA ### How was this patch tested? E2E and UT - vLLM version: v0.10.1.1 - vLLM main: 3e330fcb21 Signed-off-by: leo-pony <nengjunma@outlook.com>
2025-09-01 19:02:50 +08:00
parent ea53f9076e
commit 0df059f41a
2 changed files with 73 additions and 3 deletions
--- a/vllm_ascend/ops/layers/experts_selector.py
+++ b/vllm_ascend/ops/layers/experts_selector.py
@@ -40,6 +40,7 @@ def select_experts(hidden_states: torch.Tensor,
                   num_expert_group: Optional[int] = None,
                   custom_routing_function: Optional[Callable] = None,
                   scoring_func: str = "softmax",
+                   routed_scaling_factor=1.0,
                   e_score_correction_bias: Optional[torch.Tensor] = None,
                   indices_type: Optional[torch.dtype] = None,
                   is_unquantized: bool = False,
@@ -78,6 +79,7 @@ def select_experts(hidden_states: torch.Tensor,
        num_expert_group=num_expert_group,
        custom_routing_function=custom_routing_function,
        scoring_func=scoring_func,
+        routed_scaling_factor=routed_scaling_factor,
        global_num_experts=global_num_experts,
        is_unquantized=is_unquantized)

@@ -180,6 +182,7 @@ def _select_experts_with_fusion_ops(
        num_expert_group: Optional[int],
        custom_routing_function: Optional[Callable] = None,
        scoring_func: str = "softmax",
+        routed_scaling_factor=1.0,
        global_num_experts: int = -1,
        is_unquantized: bool = False):