upgrade main to 0212 (#6712)

### What this PR does / why we need it? Fixes `transformers_utils/processors/__init__` import error, due to https://github.com/vllm-project/vllm/pull/33247 Fixes Fused MoE break introduced by `MoERunner abstraction,` due to https://github.com/vllm-project/vllm/pull/32344 > delete AscendMoERunnere when https://github.com/vllm-project/vllm/pull/35178 is merged Fixes `Make Qwen3VL compatible with Transformers v5`, due to https://github.com/vllm-project/vllm/pull/34262 ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: 9562912cea --------- Signed-off-by: wxsIcey <1790571317@qq.com>
2026-02-25 09:17:29 +08:00
parent 0331f16a50
commit ee59429015
11 changed files with 167 additions and 32 deletions
--- a/vllm_ascend/utils.py
+++ b/vllm_ascend/utils.py
@@ -525,6 +525,13 @@ def update_aclgraph_sizes(vllm_config: VllmConfig) -> None:
            "increase the number of supported shapes, set HCCL_OP_EXPANSION_MODE=AIV."
        )

+    from vllm_ascend.utils import vllm_version_is
+
+    if vllm_version_is("0.15.0"):
+        arch_name = vllm_config.model_config.architectures[0]
+    else:
+        arch_name = vllm_config.model_config.architecture
+
    # If original sizes exceed maximum, sample a representative subset
    if max_num_batch_sizes < len(original_sizes):
        # Sample uniformly from original sizes
@@ -536,10 +543,9 @@ def update_aclgraph_sizes(vllm_config: VllmConfig) -> None:

        sampled_sizes = [original_sizes[i] for i in indices]
        update_cudagraph_capture_sizes(vllm_config, sampled_sizes)
-
        logger.info(
            "Adjusted ACL graph batch sizes for %s model (layers: %d): %d → %d sizes",
-            vllm_config.model_config.architectures[0],
+            arch_name,
            num_hidden_layers,
            len(original_sizes),
            len(
@@ -551,7 +557,7 @@ def update_aclgraph_sizes(vllm_config: VllmConfig) -> None:
        compilation_config.cudagraph_capture_sizes = original_sizes
        logger.info(
            "No adjustment needed for ACL graph batch sizes: %s model (layers: %d) with %d sizes",
-            vllm_config.model_config.architectures[0],
+            arch_name,
            num_hidden_layers,
            len(original_sizes),
        )