Warn users when release_memory_occupation is called without memory saver enabled (#4566)
This commit is contained in:
@@ -6,6 +6,7 @@ from sglang.srt.layers.attention.flashattention_backend import FlashAttentionBac
|
||||
from sglang.srt.layers.radix_attention import RadixAttention
|
||||
from sglang.srt.mem_cache.memory_pool import MHATokenToKVPool
|
||||
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode
|
||||
from sglang.test.test_utils import CustomTestCase
|
||||
|
||||
|
||||
class MockModelRunner:
|
||||
@@ -39,7 +40,7 @@ class MockReqToTokenPool:
|
||||
|
||||
|
||||
@unittest.skipIf(not torch.cuda.is_available(), "Test requires CUDA")
|
||||
class TestFlashAttentionBackend(unittest.TestCase):
|
||||
class TestFlashAttentionBackend(CustomTestCase):
|
||||
def setUp(self):
|
||||
"""Set up test fixtures before each test method."""
|
||||
self.model_runner = MockModelRunner()
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
import argparse
|
||||
import asyncio
|
||||
import copy
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import subprocess
|
||||
@@ -922,6 +923,10 @@ def run_mulit_request_test(
|
||||
|
||||
|
||||
def write_github_step_summary(content):
|
||||
if not os.environ.get("GITHUB_STEP_SUMMARY"):
|
||||
logging.warning("GITHUB_STEP_SUMMARY environment variable not set")
|
||||
return
|
||||
|
||||
with open(os.environ["GITHUB_STEP_SUMMARY"], "a") as f:
|
||||
f.write(content)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user