From 7efa4e92fead101214c00111ee67cc15b5f33f7d Mon Sep 17 00:00:00 2001 From: Mengqing Cao Date: Mon, 7 Jul 2025 10:14:40 +0800 Subject: [PATCH] [CI] Fix oom in chunk prefill (#1622) ### What this PR does / why we need it? Add the resource clear logic to fix oom issue when testing `tests/e2e/singlecard/core/ascend_scheduler`. ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? CI passed with existing test. --------- Signed-off-by: MengqingCao --- .../ascend_scheduler/test_ascend_scheduler_e2e.py | 12 +++++++++--- .../core/ascend_scheduler/test_chunk_prefill.py | 1 - 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/e2e/singlecard/core/ascend_scheduler/test_ascend_scheduler_e2e.py b/tests/e2e/singlecard/core/ascend_scheduler/test_ascend_scheduler_e2e.py index 668dafc..17116ab 100644 --- a/tests/e2e/singlecard/core/ascend_scheduler/test_ascend_scheduler_e2e.py +++ b/tests/e2e/singlecard/core/ascend_scheduler/test_ascend_scheduler_e2e.py @@ -1,8 +1,10 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import gc import os import pytest +import torch from vllm import LLM if os.getenv("VLLM_USE_V1", "0") != "1": @@ -13,8 +15,8 @@ PROMPT = "Hello my name is Robert and I" @pytest.fixture(scope="module") -def model() -> LLM: - return LLM( +def model(): + llm = LLM( MODEL, enforce_eager=True, enable_prefix_caching=True, @@ -23,6 +25,10 @@ def model() -> LLM: additional_config={"ascend_scheduler_config": { "enabled": True, }}) + yield llm + del llm + torch.npu.empty_cache() + gc.collect() def test_concurrent_partial_prefill(model): @@ -37,4 +43,4 @@ def test_prefix_cache_stats_is_recorded(model): input_tokens = {"prompt_token_ids": [101] * 129} _ = model.generate([input_tokens]) outputs = model.generate([input_tokens]) - assert outputs[0].num_cached_tokens == 128 \ No newline at end of file + assert outputs[0].num_cached_tokens == 128 diff --git a/tests/e2e/singlecard/core/ascend_scheduler/test_chunk_prefill.py b/tests/e2e/singlecard/core/ascend_scheduler/test_chunk_prefill.py index f0c907f..0b55796 100644 --- a/tests/e2e/singlecard/core/ascend_scheduler/test_chunk_prefill.py +++ b/tests/e2e/singlecard/core/ascend_scheduler/test_chunk_prefill.py @@ -17,7 +17,6 @@ MODELS = [ ] -@pytest.mark.skipif(True, reason="oom in 910B4, fix me please") @pytest.mark.parametrize("model", MODELS) @pytest.mark.parametrize("max_tokens", [4]) # cannot align results when max_tokens > 4