Warn users when release_memory_occupation is called without memory saver enabled (#4566)

This commit is contained in:
fzyzcjy
2025-03-26 15:18:14 +08:00
committed by GitHub
parent 34e07a65f1
commit 26f07294f1
10 changed files with 50 additions and 12 deletions

View File

@@ -6,6 +6,7 @@ from sglang.srt.layers.attention.flashattention_backend import FlashAttentionBac
from sglang.srt.layers.radix_attention import RadixAttention
from sglang.srt.mem_cache.memory_pool import MHATokenToKVPool
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode
from sglang.test.test_utils import CustomTestCase
class MockModelRunner:
@@ -39,7 +40,7 @@ class MockReqToTokenPool:
@unittest.skipIf(not torch.cuda.is_available(), "Test requires CUDA")
class TestFlashAttentionBackend(unittest.TestCase):
class TestFlashAttentionBackend(CustomTestCase):
def setUp(self):
"""Set up test fixtures before each test method."""
self.model_runner = MockModelRunner()

View File

@@ -3,6 +3,7 @@
import argparse
import asyncio
import copy
import logging
import os
import random
import subprocess
@@ -922,6 +923,10 @@ def run_mulit_request_test(
def write_github_step_summary(content):
if not os.environ.get("GITHUB_STEP_SUMMARY"):
logging.warning("GITHUB_STEP_SUMMARY environment variable not set")
return
with open(os.environ["GITHUB_STEP_SUMMARY"], "a") as f:
f.write(content)