Clean up import vllm in quantization/__init__.py (#4834)

2025-03-28 10:34:10 -07:00
parent ef9a378a20
commit 74e0ac1dbd
14 changed files with 191 additions and 254 deletions
--- a/test/srt/test_triton_attention_backend.py
+++ b/test/srt/test_triton_attention_backend.py
@@ -28,7 +28,7 @@ class TestTritonAttnBackend(CustomTestCase):
                "triton",
                "--enable-torch-compile",
                "--cuda-graph-max-bs",
-                16,
+                4,
            ],
        )