[Misc] upgrade to vllm main (#6646)

### What this PR does / why we need it? This PR upgrades the core vLLM dependency to a newer version from the main branch (`13397841ab469cecf1ed425c3f52a9ffc38139b5`). This is necessary to keep our project up-to-date with the latest features and fixes from upstream vLLM. 1. ac32e66cf9 pass file is moved. - vLLM version: v0.15.0 - vLLM main: d7e17aaacd --------- Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com> Signed-off-by: wxsIcey <1790571317@qq.com> Signed-off-by: Meihan-chen <jcccx.cmh@gmail.com> Co-authored-by: wxsIcey <1790571317@qq.com>
2026-02-10 14:08:59 +08:00
parent 1c7d1163f5
commit 2a826b5fad
19 changed files with 296 additions and 146 deletions
--- a/vllm_ascend/compilation/passes/qknorm_rope_fusion_pass.py
+++ b/vllm_ascend/compilation/passes/qknorm_rope_fusion_pass.py
@@ -18,7 +18,6 @@
 import torch
 import torch._inductor.pattern_matcher as pm
 from torch._inductor.pattern_matcher import PatternMatcherPass, PatternPrettyPrinter
-from vllm.compilation.vllm_inductor_pass import VllmInductorPass
 from vllm.config import VllmConfig, get_layers_from_vllm_config
 from vllm.config.compilation import Range
 from vllm.logger import logger
@@ -27,7 +26,9 @@ from vllm_ascend.utils import vllm_version_is

 if vllm_version_is("v0.15.0"):
    from vllm.attention.layer import Attention  # type: ignore
+    from vllm.compilation.vllm_inductor_pass import VllmInductorPass  # type: ignore
 else:
+    from vllm.compilation.passes.vllm_inductor_pass import VllmInductorPass
    from vllm.model_executor.layers.attention import Attention