aiter attention-backend (default enabled on AMD/ROCm) (#6381)

This commit is contained in:
HAI
2025-05-20 22:52:41 -07:00
committed by GitHub
parent 30ca18f423
commit 5c0b38f369
9 changed files with 552 additions and 23 deletions

View File

@@ -4,6 +4,7 @@ from sglang.test.test_utils import CustomTestCase, is_in_ci, run_bench_one_batch
class TestDummyGrok1(CustomTestCase):
def test_dummy_grok_1(self):
output_throughput = run_bench_one_batch(
None,

View File

@@ -3,6 +3,8 @@ Usage:
python -m unittest test_eval_accuracy_large.TestEvalAccuracyLarge.test_mmlu
"""
import os
import time
import unittest
from types import SimpleNamespace
@@ -35,6 +37,11 @@ class TestEvalAccuracyLarge(CustomTestCase):
def tearDownClass(cls):
kill_process_tree(cls.process.pid)
def tearDown(self):
# Delay between tests to allow GPU memory cleanup
if os.getenv("SGLANG_AMD_CI") == "1":
time.sleep(180)
def test_mmlu(self):
args = SimpleNamespace(
base_url=self.base_url,