Clean up import vllm in quantization/__init__.py (#4834)

This commit is contained in:
Lianmin Zheng
2025-03-28 10:34:10 -07:00
committed by GitHub
parent ef9a378a20
commit 74e0ac1dbd
14 changed files with 191 additions and 254 deletions

View File

@@ -45,7 +45,7 @@ class TestEAGLEEngine(CustomTestCase):
"mem_fraction_static": 0.7,
"cuda_graph_max_bs": 4,
}
NUM_CONFIGS = 3
NUM_CONFIGS = 2
def setUp(self):
self.prompt = "Today is a sunny day and I like"
@@ -61,8 +61,6 @@ class TestEAGLEEngine(CustomTestCase):
configs = [
# Basic config
self.BASE_CONFIG,
# Disable cuda graph
{**self.BASE_CONFIG, "disable_cuda_graph": True},
# Chunked prefill
{**self.BASE_CONFIG, "chunked_prefill_size": 4},
]

View File

@@ -28,7 +28,7 @@ class TestTritonAttnBackend(CustomTestCase):
"triton",
"--enable-torch-compile",
"--cuda-graph-max-bs",
16,
4,
],
)