Improve profiler and integrate profiler in bench_one_batch_server (#6787)

This commit is contained in:
Lianmin Zheng
2025-05-31 15:53:55 -07:00
committed by GitHub
parent b520d02888
commit 2d72fc47cf
25 changed files with 481 additions and 223 deletions

View File

@@ -24,8 +24,8 @@ class TestMLA(CustomTestCase):
other_args=[
"--trust-remote-code",
"--enable-torch-compile",
"--cuda-graph-max-bs",
"2",
"--torch-compile-max-bs",
"4",
"--chunked-prefill-size",
"256",
],
@@ -35,18 +35,6 @@ class TestMLA(CustomTestCase):
def tearDownClass(cls):
kill_process_tree(cls.process.pid)
def test_mmlu(self):
args = SimpleNamespace(
base_url=self.base_url,
model=self.model,
eval_name="mmlu",
num_examples=64,
num_threads=32,
)
metrics = run_eval(args)
self.assertGreater(metrics["score"], 0.5)
def test_mgsm_en(self):
args = SimpleNamespace(
base_url=self.base_url,