@@ -62,7 +62,10 @@ class TestBenchOneBatch(CustomTestCase):
|
||||
f"### test_torch_compile_tp2_bs1 (Mixtral-8x7B)\n"
|
||||
f"output_throughput: {output_throughput:.2f} token/s\n"
|
||||
)
|
||||
self.assertGreater(output_throughput, 220)
|
||||
if os.getenv("SGLANG_AMD_CI") == "1":
|
||||
self.assertGreater(output_throughput, 200)
|
||||
else:
|
||||
self.assertGreater(output_throughput, 220)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -32,7 +32,7 @@ class TestBenchServing(CustomTestCase):
|
||||
f'Output throughput: {res["output_throughput"]:.2f} token/s\n'
|
||||
)
|
||||
if os.getenv("SGLANG_AMD_CI") == "1":
|
||||
self.assertGreater(res["output_throughput"], 3500)
|
||||
self.assertGreater(res["output_throughput"], 3150)
|
||||
else:
|
||||
self.assertGreater(res["output_throughput"], 3800)
|
||||
|
||||
@@ -70,7 +70,7 @@ class TestBenchServing(CustomTestCase):
|
||||
f'Output throughput: {res["output_throughput"]:.2f} token/s\n'
|
||||
)
|
||||
if os.getenv("SGLANG_AMD_CI") == "1":
|
||||
self.assertGreater(res["output_throughput"], 3500)
|
||||
self.assertGreater(res["output_throughput"], 3050)
|
||||
else:
|
||||
self.assertGreater(res["output_throughput"], 3800)
|
||||
|
||||
@@ -126,7 +126,7 @@ class TestBenchServing(CustomTestCase):
|
||||
f'Output throughput: {res["output_throughput"]:.2f} token/s\n'
|
||||
)
|
||||
if os.getenv("SGLANG_AMD_CI") == "1":
|
||||
self.assertGreater(res["output_throughput"], 4000)
|
||||
self.assertGreater(res["output_throughput"], 3500)
|
||||
else:
|
||||
self.assertGreater(res["output_throughput"], 4300)
|
||||
|
||||
|
||||
@@ -37,11 +37,6 @@ class TestEvalAccuracyLarge(CustomTestCase):
|
||||
def tearDownClass(cls):
|
||||
kill_process_tree(cls.process.pid)
|
||||
|
||||
def tearDown(self):
|
||||
# Delay between tests to allow GPU memory cleanup
|
||||
if os.getenv("SGLANG_AMD_CI") == "1":
|
||||
time.sleep(180)
|
||||
|
||||
def test_mmlu(self):
|
||||
args = SimpleNamespace(
|
||||
base_url=self.base_url,
|
||||
|
||||
@@ -90,9 +90,9 @@ class TestDeepseekV3MTP(CustomTestCase):
|
||||
"2",
|
||||
"--speculative-num-draft-tokens",
|
||||
"4",
|
||||
"--mem-fraction-static",
|
||||
"0.7",
|
||||
]
|
||||
if os.environ.get("SGLANG_AMD_CI") != "1":
|
||||
other_args += ["--mem-frac", "0.7"]
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
|
||||
Reference in New Issue
Block a user