From 15b8aff5826e20df4c526e0e8184a7b25d036559 Mon Sep 17 00:00:00 2001 From: wangxiyuan Date: Mon, 29 Sep 2025 09:13:08 +0800 Subject: [PATCH] [CI] Add max_split_size_mb for e2e test to avoid oom (#3252) ### What this PR does / why we need it? we add a patch for model weight loader to avoid using vLLM weight loader v2, since v2 will lead unknown issue for torchair. While this patch make some unknown memory usage problem. To quick fix the problem, let's expend the `max_split_size_mb` to a larger value to avoid weight load oom issue. Further solution is to remove the patch and address weight loader v2 from vLLM. Closes: https://github.com/vllm-project/vllm-ascend/issues/3251 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - vLLM version: v0.10.2 - vLLM main: https://github.com/vllm-project/vllm/commit/releases/v0.11.0 Signed-off-by: wangxiyuan --- tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py b/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py index 97ecbf1..3b47222 100644 --- a/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py +++ b/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py @@ -11,6 +11,7 @@ from vllm import LLM, SamplingParams from tests.e2e.conftest import VllmRunner os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" +os.environ["PYTORCH_NPU_ALLOC_CONF"] = "max_split_size_mb:256" @pytest.fixture