From a11f8d5f6a80595cd90982b369284a5b87d50163 Mon Sep 17 00:00:00 2001 From: Xiaotong Jiang Date: Sun, 29 Dec 2024 22:49:41 -0800 Subject: [PATCH] [feat] Add math eval to CI (#2652) --- test/srt/test_eval_accuracy_large.py | 11 +++++++++++ test/srt/test_eval_accuracy_mini.py | 12 ++++++++++++ 2 files changed, 23 insertions(+) diff --git a/test/srt/test_eval_accuracy_large.py b/test/srt/test_eval_accuracy_large.py index f7fb3cec3..bda6053ee 100644 --- a/test/srt/test_eval_accuracy_large.py +++ b/test/srt/test_eval_accuracy_large.py @@ -68,6 +68,17 @@ class TestEvalAccuracyLarge(unittest.TestCase): metrics = run_eval(args) self.assertGreater(metrics["score"], 0.835) + def test_math(self): + args = SimpleNamespace( + base_url=self.base_url, + model=self.model, + eval_name="math", + num_examples=5000, + num_threads=1024 + ) + + metrics = run_eval(args) + self.assertGreaterEqual(metrics["score"], 0.519 - 0.01) # -1% to account for sampling variance if __name__ == "__main__": unittest.main() diff --git a/test/srt/test_eval_accuracy_mini.py b/test/srt/test_eval_accuracy_mini.py index a008c3869..74741aba5 100644 --- a/test/srt/test_eval_accuracy_mini.py +++ b/test/srt/test_eval_accuracy_mini.py @@ -37,6 +37,18 @@ class TestEvalAccuracyMini(unittest.TestCase): metrics = run_eval(args) self.assertGreaterEqual(metrics["score"], 0.65) + def test_math(self): + args = SimpleNamespace( + base_url=self.base_url, + model=self.model, + eval_name="math", + num_examples=64, + num_threads=32, + temperature=0.1, + ) + metrics = run_eval(args) + self.assertGreaterEqual(metrics["score"], 0.519 - 0.03) # -3% to account for sampling variance + if __name__ == "__main__": unittest.main()