aiter attention-backend (default enabled on AMD/ROCm) (#6381)

This commit is contained in:
HAI
2025-05-20 22:52:41 -07:00
committed by GitHub
parent 30ca18f423
commit 5c0b38f369
9 changed files with 552 additions and 23 deletions

View File

@@ -3,6 +3,8 @@ Usage:
python -m unittest test_eval_accuracy_large.TestEvalAccuracyLarge.test_mmlu
"""
import os
import time
import unittest
from types import SimpleNamespace
@@ -35,6 +37,11 @@ class TestEvalAccuracyLarge(CustomTestCase):
def tearDownClass(cls):
kill_process_tree(cls.process.pid)
def tearDown(self):
# Delay between tests to allow GPU memory cleanup
if os.getenv("SGLANG_AMD_CI") == "1":
time.sleep(180)
def test_mmlu(self):
args = SimpleNamespace(
base_url=self.base_url,