[CI] test chunked prefill more (#5798)

2025-04-28 10:57:17 -07:00
parent d73ddeb196
commit 849c83a0c0
15 changed files with 212 additions and 97 deletions
--- a/test/srt/test_torch_native_attention_backend.py
+++ b/test/srt/test_torch_native_attention_backend.py
@@ -13,23 +13,11 @@ from sglang.test.test_utils import (
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
    CustomTestCase,
-    is_in_ci,
    popen_launch_server,
-    run_bench_one_batch,
 )


 class TestTorchNativeAttnBackend(CustomTestCase):
-    def test_latency(self):
-        output_throughput = run_bench_one_batch(
-            DEFAULT_MODEL_NAME_FOR_TEST,
-            ["--attention-backend", "torch_native"],
-        )
-
-        if is_in_ci():
-            # Torch native backend is expected to be slower
-            self.assertGreater(output_throughput, 40)
-
    def test_mmlu(self):
        model = DEFAULT_MODEL_NAME_FOR_TEST
        base_url = DEFAULT_URL_FOR_TEST