[CI] Fix CI Break: upstream adds routed_scaling_factor in forward_oot interface (#2675)

### What this PR does / why we need it?
Fix CI Break: upstream adds routed_scaling_factor in forward_oot
interface, vllm-ascend needs to adapt

### Does this PR introduce _any_ user-facing change?
NA

### How was this patch tested?
E2E and UT

- vLLM version: v0.10.1.1
- vLLM main:
3e330fcb21

Signed-off-by: leo-pony <nengjunma@outlook.com>
This commit is contained in:
leo-pony
2025-09-01 19:02:50 +08:00
committed by GitHub
parent ea53f9076e
commit 0df059f41a
2 changed files with 73 additions and 3 deletions

View File

@@ -40,6 +40,7 @@ def select_experts(hidden_states: torch.Tensor,
num_expert_group: Optional[int] = None,
custom_routing_function: Optional[Callable] = None,
scoring_func: str = "softmax",
routed_scaling_factor=1.0,
e_score_correction_bias: Optional[torch.Tensor] = None,
indices_type: Optional[torch.dtype] = None,
is_unquantized: bool = False,
@@ -78,6 +79,7 @@ def select_experts(hidden_states: torch.Tensor,
num_expert_group=num_expert_group,
custom_routing_function=custom_routing_function,
scoring_func=scoring_func,
routed_scaling_factor=routed_scaling_factor,
global_num_experts=global_num_experts,
is_unquantized=is_unquantized)
@@ -180,6 +182,7 @@ def _select_experts_with_fusion_ops(
num_expert_group: Optional[int],
custom_routing_function: Optional[Callable] = None,
scoring_func: str = "softmax",
routed_scaling_factor=1.0,
global_num_experts: int = -1,
is_unquantized: bool = False):