[feat] Add math eval to CI (#2652)

This commit is contained in:
Xiaotong Jiang
2024-12-29 22:49:41 -08:00
committed by GitHub
parent 098d659c0e
commit a11f8d5f6a
2 changed files with 23 additions and 0 deletions

View File

@@ -68,6 +68,17 @@ class TestEvalAccuracyLarge(unittest.TestCase):
metrics = run_eval(args)
self.assertGreater(metrics["score"], 0.835)
def test_math(self):
args = SimpleNamespace(
base_url=self.base_url,
model=self.model,
eval_name="math",
num_examples=5000,
num_threads=1024
)
metrics = run_eval(args)
self.assertGreaterEqual(metrics["score"], 0.519 - 0.01) # -1% to account for sampling variance
if __name__ == "__main__":
unittest.main()

View File

@@ -37,6 +37,18 @@ class TestEvalAccuracyMini(unittest.TestCase):
metrics = run_eval(args)
self.assertGreaterEqual(metrics["score"], 0.65)
def test_math(self):
args = SimpleNamespace(
base_url=self.base_url,
model=self.model,
eval_name="math",
num_examples=64,
num_threads=32,
temperature=0.1,
)
metrics = run_eval(args)
self.assertGreaterEqual(metrics["score"], 0.519 - 0.03) # -3% to account for sampling variance
if __name__ == "__main__":
unittest.main()