Fix request abortion (#6184)

This commit is contained in:
Lianmin Zheng
2025-05-10 21:54:46 -07:00
committed by GitHub
parent 4319978c73
commit de167cf5fa
10 changed files with 148 additions and 84 deletions

View File

@@ -3,7 +3,6 @@ Usage:
python3 test/srt/test_flashmla.py
"""
import os
import unittest
from types import SimpleNamespace
@@ -61,7 +60,7 @@ class TestFlashMLAAttnBackend(unittest.TestCase):
metrics = run_eval_few_shot_gsm8k(args)
print(metrics)
self.assertGreater(metrics["accuracy"], 0.62)
self.assertGreater(metrics["accuracy"], 0.60)
class TestFlashMLAAttnLatency(unittest.TestCase):