[CI] test chunked prefill more (#5798)

2025-04-28 10:57:17 -07:00
parent d73ddeb196
commit 849c83a0c0
15 changed files with 212 additions and 97 deletions
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -975,7 +975,7 @@ class ModelRunner:
        after_mem = get_available_gpu_memory(self.device, self.gpu_id)
        logger.info(
            f"Capture cuda graph end. Time elapsed: {time.time() - tic:.2f} s. "
-            f"avail mem={after_mem:.2f} GB. mem usage={(before_mem - after_mem):.2f} GB."
+            f"mem usage={(before_mem - after_mem):.2f} GB. avail mem={after_mem:.2f} GB."
        )

    def apply_torch_tp(self):
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -426,7 +426,7 @@ class ServerArgs:
        parser.add_argument(
            "--skip-tokenizer-init",
            action="store_true",
-            help="If set, skip init tokenizer and pass input_ids in generate request",
+            help="If set, skip init tokenizer and pass input_ids in generate request.",
        )
        parser.add_argument(
            "--enable-tokenizer-batch-encode",
@@ -565,6 +565,7 @@ class ServerArgs:
            "name, a tag name, or a commit id. If unspecified, will use "
            "the default version.",
        )
+
        # Memory and scheduling
        parser.add_argument(
            "--mem-fraction-static",