adapt to vllm-ascend v0.18.0

Signed-off-by: Jing Wang <jingwang96@qq.com>
2026-04-21 03:05:32 +00:00
parent e18643f8a4
commit 6c097beaa5
132 changed files with 28744 additions and 101 deletions
--- a/vllm_ascend/platform.py
+++ b/vllm_ascend/platform.py
@@ -519,7 +519,11 @@ class NPUPlatform(Platform):
        # Find more details at https://docs.vllm.ai/projects/ascend/en/latest/faqs.html#how-to-handle-the-out-of-memory-issue
        # NOTE: We should not set this environment variable in RL (sleep mode) scenarios.
        # Find more details about how to configure this environment variable at https://www.hiascend.com/document/detail/zh/Pytorch/720/comref/Envvariables/Envir_012.html
-        if model_config and not model_config.enable_sleep_mode:
+        if (
+            model_config
+            and not model_config.enable_sleep_mode
+            and not envs_ascend.VLLM_ASCEND_ENABLE_VNPU
+        ):
            npu_alloc_configs = os.getenv("PYTORCH_NPU_ALLOC_CONF", "expandable_segments:True")
            # This environment variable may have more than one key-value pairs.
            # We should append ",expandable_segments:True" to the current configs.