Improve streaming, log_level, memory report, weight loading, and benchmark script (#7632)

Co-authored-by: Kan Wu <wukanustc@gmail.com>
This commit is contained in:
Lianmin Zheng
2025-06-29 23:16:19 -07:00
committed by GitHub
parent c5131f7a2f
commit 22352d47a9
24 changed files with 626 additions and 160 deletions

View File

@@ -1678,7 +1678,6 @@ def run_benchmark(args_: argparse.Namespace):
if args.base_url
else f"http://{args.host}:{args.port}/generate"
)
args.apply_chat_template = True
elif args.backend in ["sglang-oai", "vllm", "lmdeploy"]:
api_url = (
f"{args.base_url}/v1/completions"