Tune the threshold for accuracy tests in CI (#2071)
This commit is contained in:
@@ -42,7 +42,7 @@ class TestEvalAccuracyLarge(unittest.TestCase):
|
||||
)
|
||||
|
||||
metrics = run_eval(args)
|
||||
assert metrics["score"] >= 0.71, f"{metrics}"
|
||||
self.assertGreater(metrics["score"], 0.71)
|
||||
|
||||
def test_human_eval(self):
|
||||
args = SimpleNamespace(
|
||||
@@ -54,7 +54,7 @@ class TestEvalAccuracyLarge(unittest.TestCase):
|
||||
)
|
||||
|
||||
metrics = run_eval(args)
|
||||
assert metrics["score"] >= 0.64, f"{metrics}"
|
||||
self.assertGreater(metrics["score"], 0.64)
|
||||
|
||||
def test_mgsm_en(self):
|
||||
args = SimpleNamespace(
|
||||
@@ -66,7 +66,7 @@ class TestEvalAccuracyLarge(unittest.TestCase):
|
||||
)
|
||||
|
||||
metrics = run_eval(args)
|
||||
assert metrics["score"] >= 0.84, f"{metrics}"
|
||||
self.assertGreater(metrics["score"], 0.84)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -47,7 +47,7 @@ class TestMoEEvalAccuracyLarge(unittest.TestCase):
|
||||
)
|
||||
|
||||
metrics = run_eval(args)
|
||||
assert metrics["score"] >= 0.62, f"{metrics}"
|
||||
self.assertGreater(metrics["score"], 0.62)
|
||||
|
||||
def test_human_eval(self):
|
||||
args = SimpleNamespace(
|
||||
@@ -59,7 +59,7 @@ class TestMoEEvalAccuracyLarge(unittest.TestCase):
|
||||
)
|
||||
|
||||
metrics = run_eval(args)
|
||||
assert metrics["score"] >= 0.42, f"{metrics}"
|
||||
self.assertGreater(metrics["score"], 0.41)
|
||||
|
||||
def test_mgsm_en(self):
|
||||
args = SimpleNamespace(
|
||||
@@ -71,7 +71,7 @@ class TestMoEEvalAccuracyLarge(unittest.TestCase):
|
||||
)
|
||||
|
||||
metrics = run_eval(args)
|
||||
assert metrics["score"] >= 0.62, f"{metrics}"
|
||||
self.assertGreater(metrics["score"], 0.62)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user