Log if cuda graph is used & extend cuda graph capture to cuda-graph-max-bs (#6201)

Co-authored-by: SangBin Cho <rkooo567@gmail.com>
2025-05-12 00:17:33 -07:00
parent 7d3a3d4510
commit fba8eccd7e
27 changed files with 293 additions and 121 deletions
--- a/python/sglang/test/test_utils.py
+++ b/python/sglang/test/test_utils.py
@@ -395,12 +395,12 @@ def popen_launch_server(
    other_args: list[str] = (),
    env: Optional[dict] = None,
    return_stdout_stderr: Optional[tuple] = None,
-    pd_seperated: bool = False,
+    pd_separated: bool = False,
 ):
    _, host, port = base_url.split(":")
    host = host[2:]

-    if pd_seperated:
+    if pd_separated:
        command = "sglang.launch_pd_server"
    else:
        command = "sglang.launch_server"
@@ -414,7 +414,7 @@ def popen_launch_server(
        *[str(x) for x in other_args],
    ]

-    if pd_seperated:
+    if pd_separated:
        command.extend(
            [
                "--lb-host",
@@ -656,7 +656,7 @@ def get_benchmark_args(
    disable_stream=False,
    disable_ignore_eos=False,
    seed: int = 0,
-    pd_seperated: bool = False,
+    pd_separated: bool = False,
 ):
    return SimpleNamespace(
        backend="sglang",
@@ -686,7 +686,7 @@ def get_benchmark_args(
        profile=None,
        lora_name=None,
        prompt_suffix="",
-        pd_seperated=pd_seperated,
+        pd_separated=pd_separated,
    )


@@ -750,7 +750,7 @@ def run_bench_serving_multi(
    other_server_args,
    benchmark_args,
    need_warmup=False,
-    pd_seperated=False,
+    pd_separated=False,
 ):
    # Launch the server
    process = popen_launch_server(
@@ -758,7 +758,7 @@ def run_bench_serving_multi(
        base_url,
        timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
        other_args=other_server_args,
-        pd_seperated=pd_seperated,
+        pd_separated=pd_separated,
    )

    # run benchmark for all