[E2E] Optimize e2e test. (#5091)

### What this PR does / why we need it? [E2E] Optimize e2e test. - Remove the test_basic_camem testcase. - Change Qwen2.5-0.5B-Instruct-W8A8 to Qwen3-0.6B-W8A8 - vLLM version: v0.12.0 - vLLM main: ad32e3e19c Signed-off-by: menogrey <1299267905@qq.com>
2025-12-24 10:41:55 +08:00
parent bd4fb871c6
commit 74a1de50a9
1 changed files with 0 additions and 39 deletions
--- a/tests/e2e/singlecard/test_camem.py
+++ b/tests/e2e/singlecard/test_camem.py
@@ -17,7 +17,6 @@
 # limitations under the License.
 #

-import gc
 import os
 from unittest.mock import patch

@@ -27,44 +26,6 @@ from vllm.utils.mem_constants import GiB_bytes

 from tests.e2e.conftest import VllmRunner
 from tests.e2e.utils import fork_new_process_for_each_test
-from vllm_ascend.device_allocator.camem import CaMemAllocator
-
-
-@fork_new_process_for_each_test
-def test_basic_camem():
-    # some tensors from default memory pool
-    shape = (1024, 1024)
-    x = torch.empty(shape, device='npu:0')
-    x.zero_()
-
-    # some tensors from custom memory pool
-    allocator = CaMemAllocator.get_instance()
-    with allocator.use_memory_pool():
-        # custom memory pool
-        y = torch.empty(shape, device='npu:0')
-        y.zero_()
-        y += 1
-        z = torch.empty(shape, device='npu:0')
-        z.zero_()
-        z += 2
-
-    # they can be used together
-    output = x + y + z
-    assert torch.allclose(output, torch.ones_like(output) * 3)
-
-    free_bytes = torch.npu.mem_get_info()[0]
-    allocator.sleep()
-    free_bytes_after_sleep = torch.npu.mem_get_info()[0]
-    assert free_bytes_after_sleep > free_bytes
-    allocator.wake_up()
-
-    # they can be used together
-    output = x + y + z
-    assert torch.allclose(output, torch.ones_like(output) * 3)
-
-    gc.collect()
-    torch.npu.empty_cache()
-    torch.npu.reset_peak_memory_stats()


@fork_new_process_for_each_test