[CI] test chunked prefill more (#5798)
This commit is contained in:
@@ -26,7 +26,7 @@ class TestDummyGrok1(CustomTestCase):
|
||||
)
|
||||
|
||||
if is_in_ci():
|
||||
assert output_throughput > 0, f"{output_throughput=}"
|
||||
self.assertGreater(output_throughput, 0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -64,7 +64,7 @@ class TestVLMModels(CustomTestCase):
|
||||
model = "openai_compatible"
|
||||
tp = 1
|
||||
tasks = "mmmu_val"
|
||||
batch_size = 1
|
||||
batch_size = 2
|
||||
log_suffix = "openai_compatible"
|
||||
os.makedirs(output_path, exist_ok=True)
|
||||
|
||||
@@ -125,6 +125,9 @@ class TestVLMModels(CustomTestCase):
|
||||
"--chat-template",
|
||||
model.chat_template,
|
||||
"--trust-remote-code",
|
||||
"--cuda-graph-max-bs",
|
||||
"32",
|
||||
"--enable-multimodal",
|
||||
"--mem-fraction-static",
|
||||
str(self.parsed_args.mem_fraction_static), # Use class variable
|
||||
],
|
||||
@@ -171,7 +174,7 @@ if __name__ == "__main__":
|
||||
"--mem-fraction-static",
|
||||
type=float,
|
||||
help="Static memory fraction for the model",
|
||||
default=0.6,
|
||||
default=0.8,
|
||||
)
|
||||
|
||||
# Parse args intended for unittest
|
||||
|
||||
Reference in New Issue
Block a user