From 74a1de50a945af33a3acd878ab7e29f38e00b0c1 Mon Sep 17 00:00:00 2001
From: zhangyiming <34808445+menogrey@users.noreply.github.com>
Date: Wed, 24 Dec 2025 10:41:55 +0800
Subject: [PATCH] [E2E] Optimize e2e test. (#5091)

### What this PR does / why we need it?
[E2E] Optimize e2e test.
- Remove the test_basic_camem testcase.
- Change Qwen2.5-0.5B-Instruct-W8A8 to Qwen3-0.6B-W8A8

- vLLM version: v0.12.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9

Signed-off-by: menogrey <1299267905@qq.com>
---
 tests/e2e/singlecard/test_camem.py | 39 ------------------------------
 1 file changed, 39 deletions(-)

diff --git a/tests/e2e/singlecard/test_camem.py b/tests/e2e/singlecard/test_camem.py
index 33a99a42..f3fd6aa4 100644
--- a/tests/e2e/singlecard/test_camem.py
+++ b/tests/e2e/singlecard/test_camem.py
@@ -17,7 +17,6 @@
 # limitations under the License.
 #
 
-import gc
 import os
 from unittest.mock import patch
 
@@ -27,44 +26,6 @@ from vllm.utils.mem_constants import GiB_bytes
 
 from tests.e2e.conftest import VllmRunner
 from tests.e2e.utils import fork_new_process_for_each_test
-from vllm_ascend.device_allocator.camem import CaMemAllocator
-
-
-@fork_new_process_for_each_test
-def test_basic_camem():
-    # some tensors from default memory pool
-    shape = (1024, 1024)
-    x = torch.empty(shape, device='npu:0')
-    x.zero_()
-
-    # some tensors from custom memory pool
-    allocator = CaMemAllocator.get_instance()
-    with allocator.use_memory_pool():
-        # custom memory pool
-        y = torch.empty(shape, device='npu:0')
-        y.zero_()
-        y += 1
-        z = torch.empty(shape, device='npu:0')
-        z.zero_()
-        z += 2
-
-    # they can be used together
-    output = x + y + z
-    assert torch.allclose(output, torch.ones_like(output) * 3)
-
-    free_bytes = torch.npu.mem_get_info()[0]
-    allocator.sleep()
-    free_bytes_after_sleep = torch.npu.mem_get_info()[0]
-    assert free_bytes_after_sleep > free_bytes
-    allocator.wake_up()
-
-    # they can be used together
-    output = x + y + z
-    assert torch.allclose(output, torch.ones_like(output) * 3)
-
-    gc.collect()
-    torch.npu.empty_cache()
-    torch.npu.reset_peak_memory_stats()
 
 
 @fork_new_process_for_each_test