upgrade main to 0212 (#6712)

### What this PR does / why we need it? Fixes `transformers_utils/processors/__init__` import error, due to https://github.com/vllm-project/vllm/pull/33247 Fixes Fused MoE break introduced by `MoERunner abstraction,` due to https://github.com/vllm-project/vllm/pull/32344 > delete AscendMoERunnere when https://github.com/vllm-project/vllm/pull/35178 is merged Fixes `Make Qwen3VL compatible with Transformers v5`, due to https://github.com/vllm-project/vllm/pull/34262 ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: 9562912cea --------- Signed-off-by: wxsIcey <1790571317@qq.com>
2026-02-25 09:17:29 +08:00
parent 0331f16a50
commit ee59429015
11 changed files with 167 additions and 32 deletions
--- a/tests/ut/eplb/core/test_eplb_utils.py
+++ b/tests/ut/eplb/core/test_eplb_utils.py
@@ -25,22 +25,35 @@ class TestAscendConfig(unittest.TestCase):
        if vllm_version_is("0.15.0"):
            moe_parallel_config = FusedMoEParallelConfig(
                2, 0, 1, 2, 1, 1, 1, 1, True, "hccl", enable_eplb=True)
+            moe_config = FusedMoEConfig(
+                num_experts=8,
+                experts_per_token=8,
+                hidden_dim=8192,
+                intermediate_size_per_partition=5,
+                num_local_experts=8,
+                activation="silu",
+                device="npu",
+                routing_method=RoutingMethodType.Simulated,
+                moe_parallel_config=moe_parallel_config,
+                in_dtype=torch.float16,
+            )
        else:
            moe_parallel_config = FusedMoEParallelConfig(
-                2, 0, 1, 2, 1, 1, 1, 1, True, "hccl",
-                is_sequence_parallel=False, enable_eplb=True)
-        moe_config = FusedMoEConfig(
-            num_experts=8,
-            experts_per_token=8,
-            hidden_dim=8192,
-            intermediate_size_per_partition=5,
-            num_local_experts=8,
-            activation="silu",
-            device="npu",
-            routing_method=RoutingMethodType.Simulated,
-            moe_parallel_config=moe_parallel_config,
-            in_dtype=torch.float16,
-        )
+                2, 0, 1, 2, 1, 1, 1, 1, 1, True, "hccl",
+                enable_eplb=True)
+            moe_config = FusedMoEConfig(
+                num_experts=8,
+                experts_per_token=8,
+                hidden_dim=8192,
+                intermediate_size_per_partition=5,
+                num_local_experts=8,
+                num_logical_experts=8,
+                activation="silu",
+                device="npu",
+                routing_method=RoutingMethodType.Simulated,
+                moe_parallel_config=moe_parallel_config,
+                in_dtype=torch.float16,
+            )
        moe_config.supports_eplb = True
        self.vllm_config = vllm_config
        self.moe_config = moe_config