From c25231c6792e3e867f73f0f2213e86699637a73d Mon Sep 17 00:00:00 2001 From: shangmingc Date: Wed, 28 May 2025 15:40:26 +0800 Subject: [PATCH] [CI] Fix flaky pp single node test (#6689) Signed-off-by: Shangming Cai --- test/srt/test_pp_single_node.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/test/srt/test_pp_single_node.py b/test/srt/test_pp_single_node.py index 51cc98108..efd894fab 100644 --- a/test/srt/test_pp_single_node.py +++ b/test/srt/test_pp_single_node.py @@ -108,11 +108,13 @@ class TestQwenPPAccuracy(unittest.TestCase): print(f"[Qwen PP Comparison] Baseline: {baseline} | PP: {pp_metrics}") - self.assertAlmostEqual( + self.assertGreaterEqual( pp_metrics["accuracy"], - baseline["accuracy"], - delta=0.01, - msg=f"PP accuracy exceeds 1% (baseline: {baseline['accuracy']}, pp: {pp_metrics['accuracy']})", + baseline["accuracy"] - 0.01, + msg=( + f"PP accuracy dropped more than 1% compared to baseline. " + f"Baseline: {baseline['accuracy']:.2%}, PP: {pp_metrics['accuracy']:.2%}" + ), ) @@ -164,11 +166,13 @@ class TestQwenPPTieWeightsAccuracy(unittest.TestCase): print(f"[Qwen PP Comparison] Baseline: {baseline} | PP: {pp_metrics}") - self.assertAlmostEqual( + self.assertGreaterEqual( pp_metrics["accuracy"], - baseline["accuracy"], - delta=0.01, - msg=f"PP accuracy exceeds 1% (baseline: {baseline['accuracy']}, pp: {pp_metrics['accuracy']})", + baseline["accuracy"] - 0.01, + msg=( + f"PP accuracy dropped more than 1% compared to baseline. " + f"Baseline: {baseline['accuracy']:.2%}, PP: {pp_metrics['accuracy']:.2%}" + ), )