[Main2Main] Upgrade vLLM to 0226 (#6813)

### What this PR does / why we need it? Breaking: 1. https://github.com/vllm-project/vllm/pull/33452 2. https://github.com/vllm-project/vllm/pull/33451 3. https://github.com/vllm-project/vllm/pull/32567 4. https://github.com/vllm-project/vllm/pull/32344 ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: 83b47f67b1 --------- Signed-off-by: MrZ20 <2609716663@qq.com> Signed-off-by: gcanlin <canlinguosdu@gmail.com> Co-authored-by: MrZ20 <2609716663@qq.com>
2026-02-27 16:05:21 +08:00
parent 80316c5824
commit e4458b2d2b
40 changed files with 117 additions and 184 deletions
--- a/vllm_ascend/ascend_forward_context.py
+++ b/vllm_ascend/ascend_forward_context.py
@@ -19,7 +19,6 @@ from vllm_ascend.utils import (
    is_drafter_moe_model,
    is_moe_model,
    speculative_enable_dispatch_gmm_combine_decode,
-    vllm_version_is,
 )


@@ -152,10 +151,6 @@ def set_ascend_forward_context(
                mc2_mask[:num_actual_tokens] = True
                mc2_mask[num_actual_tokens:] = False
                forward_context.mc2_mask = mc2_mask
-
-        if is_draft_model and vllm_version_is("0.15.0"):
-            forward_context.remaining_moe_layers = None
-
        try:
            yield
        finally: