Log if cuda graph is used & extend cuda graph capture to cuda-graph-max-bs (#6201)
Co-authored-by: SangBin Cho <rkooo567@gmail.com>
This commit is contained in:
@@ -395,12 +395,12 @@ def popen_launch_server(
|
||||
other_args: list[str] = (),
|
||||
env: Optional[dict] = None,
|
||||
return_stdout_stderr: Optional[tuple] = None,
|
||||
pd_seperated: bool = False,
|
||||
pd_separated: bool = False,
|
||||
):
|
||||
_, host, port = base_url.split(":")
|
||||
host = host[2:]
|
||||
|
||||
if pd_seperated:
|
||||
if pd_separated:
|
||||
command = "sglang.launch_pd_server"
|
||||
else:
|
||||
command = "sglang.launch_server"
|
||||
@@ -414,7 +414,7 @@ def popen_launch_server(
|
||||
*[str(x) for x in other_args],
|
||||
]
|
||||
|
||||
if pd_seperated:
|
||||
if pd_separated:
|
||||
command.extend(
|
||||
[
|
||||
"--lb-host",
|
||||
@@ -656,7 +656,7 @@ def get_benchmark_args(
|
||||
disable_stream=False,
|
||||
disable_ignore_eos=False,
|
||||
seed: int = 0,
|
||||
pd_seperated: bool = False,
|
||||
pd_separated: bool = False,
|
||||
):
|
||||
return SimpleNamespace(
|
||||
backend="sglang",
|
||||
@@ -686,7 +686,7 @@ def get_benchmark_args(
|
||||
profile=None,
|
||||
lora_name=None,
|
||||
prompt_suffix="",
|
||||
pd_seperated=pd_seperated,
|
||||
pd_separated=pd_separated,
|
||||
)
|
||||
|
||||
|
||||
@@ -750,7 +750,7 @@ def run_bench_serving_multi(
|
||||
other_server_args,
|
||||
benchmark_args,
|
||||
need_warmup=False,
|
||||
pd_seperated=False,
|
||||
pd_separated=False,
|
||||
):
|
||||
# Launch the server
|
||||
process = popen_launch_server(
|
||||
@@ -758,7 +758,7 @@ def run_bench_serving_multi(
|
||||
base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=other_server_args,
|
||||
pd_seperated=pd_seperated,
|
||||
pd_separated=pd_separated,
|
||||
)
|
||||
|
||||
# run benchmark for all
|
||||
|
||||
Reference in New Issue
Block a user