Warn users when release_memory_occupation is called without memory saver enabled (#4566)

2025-03-26 15:18:14 +08:00
parent 34e07a65f1
commit 26f07294f1
10 changed files with 50 additions and 12 deletions
--- a/python/sglang/test/attention/test_flashattn_backend.py
+++ b/python/sglang/test/attention/test_flashattn_backend.py
@@ -6,6 +6,7 @@ from sglang.srt.layers.attention.flashattention_backend import FlashAttentionBac
 from sglang.srt.layers.radix_attention import RadixAttention
 from sglang.srt.mem_cache.memory_pool import MHATokenToKVPool
 from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode
+from sglang.test.test_utils import CustomTestCase


 class MockModelRunner:
@@ -39,7 +40,7 @@ class MockReqToTokenPool:


@unittest.skipIf(not torch.cuda.is_available(), "Test requires CUDA")
-class TestFlashAttentionBackend(unittest.TestCase):
+class TestFlashAttentionBackend(CustomTestCase):
    def setUp(self):
        """Set up test fixtures before each test method."""
        self.model_runner = MockModelRunner()