Fuse more ops & Simplify token mapping (#1758)

This commit is contained in:
Lianmin Zheng
2024-10-22 23:20:43 -07:00
committed by GitHub
parent 17536e7e3d
commit ad4125d1a9
9 changed files with 99 additions and 75 deletions

View File

@@ -31,6 +31,7 @@ class TestEvalAccuracyMini(unittest.TestCase):
eval_name="mmlu",
num_examples=64,
num_threads=32,
temperature=0.1,
)
metrics = run_eval(args)

View File

@@ -23,7 +23,7 @@ class TestPyTorchSamplingBackend(unittest.TestCase):
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=["--sampling-backend", "pytorch"],
other_args=["--sampling-backend", "pytorch", "--disable-radix-cache"],
)
@classmethod
@@ -37,6 +37,7 @@ class TestPyTorchSamplingBackend(unittest.TestCase):
eval_name="mmlu",
num_examples=64,
num_threads=32,
temperature=0.1,
)
metrics = run_eval(args)