upgrade vLLM to 0.12.0 tag (#4647)

Upgrade vLLM to v0.12.0 tag - vLLM version: 86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24 - vLLM main: 86e178f7c4 Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-12-03 23:43:05 +08:00
parent 26e8e58cea
commit 3f4c0ea0a0
22 changed files with 97 additions and 47 deletions
--- a/vllm_ascend/platform.py
+++ b/vllm_ascend/platform.py
@@ -159,7 +159,8 @@ class NPUPlatform(Platform):
                compilation_config.splitting_ops = []

        compilation_config.cudagraph_num_of_warmups = 1
-        compilation_config.pass_config.enable_fusion = False
+        compilation_config.pass_config.fuse_norm_quant = False
+        compilation_config.pass_config.fuse_act_quant = False

        if compilation_config.mode not in [
                CompilationMode.NONE, CompilationMode.VLLM_COMPILE
@@ -194,7 +195,7 @@ class NPUPlatform(Platform):
        # to ascend ops && hardwares. We update these sizes here to improve
        # default performance.
        update_default_aclgraph_sizes(vllm_config)
-        # TODO delete graph size update here when compilation_config.pass_config.enable_sequence_parallelism
+        # TODO delete graph size update here when compilation_config.pass_config.enable_sp
        # is supported by vllm-ascend.
        if vllm_config.parallel_config.tensor_parallel_size > 1 and not vllm_config.model_config.enforce_eager and \
                enable_sp(vllm_config):