[CI] test chunked prefill more (#5798)
This commit is contained in:
@@ -975,7 +975,7 @@ class ModelRunner:
|
||||
after_mem = get_available_gpu_memory(self.device, self.gpu_id)
|
||||
logger.info(
|
||||
f"Capture cuda graph end. Time elapsed: {time.time() - tic:.2f} s. "
|
||||
f"avail mem={after_mem:.2f} GB. mem usage={(before_mem - after_mem):.2f} GB."
|
||||
f"mem usage={(before_mem - after_mem):.2f} GB. avail mem={after_mem:.2f} GB."
|
||||
)
|
||||
|
||||
def apply_torch_tp(self):
|
||||
|
||||
@@ -426,7 +426,7 @@ class ServerArgs:
|
||||
parser.add_argument(
|
||||
"--skip-tokenizer-init",
|
||||
action="store_true",
|
||||
help="If set, skip init tokenizer and pass input_ids in generate request",
|
||||
help="If set, skip init tokenizer and pass input_ids in generate request.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enable-tokenizer-batch-encode",
|
||||
@@ -565,6 +565,7 @@ class ServerArgs:
|
||||
"name, a tag name, or a commit id. If unspecified, will use "
|
||||
"the default version.",
|
||||
)
|
||||
|
||||
# Memory and scheduling
|
||||
parser.add_argument(
|
||||
"--mem-fraction-static",
|
||||
|
||||
Reference in New Issue
Block a user