Refactor the docs (#9031)

2025-08-10 19:49:45 -07:00
parent 0f229c07f1
commit 2449a0afe2
80 changed files with 619 additions and 750 deletions
--- a/python/sglang/init.py
+++ b/python/sglang/init.py
@@ -32,16 +32,20 @@ from sglang.lang.choices import (
    token_length_normalized,
    unconditional_likelihood_normalized,
 )
-from sglang.srt.entrypoints.engine import Engine
+
+# Lazy import some libraries
 from sglang.utils import LazyImport
 from sglang.version import __version__

-ServerArgs = LazyImport("sglang.srt.server_args", "ServerArgs")
 Anthropic = LazyImport("sglang.lang.backend.anthropic", "Anthropic")
 LiteLLM = LazyImport("sglang.lang.backend.litellm", "LiteLLM")
 OpenAI = LazyImport("sglang.lang.backend.openai", "OpenAI")
 VertexAI = LazyImport("sglang.lang.backend.vertexai", "VertexAI")

+# Runtime Engine APIs
+ServerArgs = LazyImport("sglang.srt.server_args", "ServerArgs")
+Engine = LazyImport("sglang.srt.entrypoints.engine", "Engine")
+
 __all__ = [
    "Engine",
    "Runtime",
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -2175,10 +2175,6 @@ class ServerArgs:
        self.mem_fraction_static = (
            original_server_arg_mem_fraction * final_overall_factor
        )
-        logger.warning(
-            f"Multimodal model: Dynamically adjusted --mem-fraction-static "
-            f"from: {original_server_arg_mem_fraction:.3f} to: {self.mem_fraction_static:.3f}."
-        )


 def prepare_server_args(argv: List[str]) -> ServerArgs:
--- a/python/sglang/test/doc_patch.py
+++ b/python/sglang/test/doc_patch.py
@@ -0,0 +1,59 @@
+"""
+Do some monkey patch to make the documentation compilation faster and more reliable.
+
+- Avoid port conflicts
+- Reduce the server launch time
+"""
+
+import weakref
+
+import nest_asyncio
+
+nest_asyncio.apply()
+
+import sglang.srt.server_args as server_args_mod
+from sglang.utils import execute_shell_command, reserve_port
+
+DEFAULT_MAX_RUNNING_REQUESTS = 128
+DEFAULT_MAX_TOTAL_TOKENS = 20480  # To allow multiple servers on the same machine
+
+_original_post_init = server_args_mod.ServerArgs.__post_init__
+
+
+def patched_post_init(self):
+    _original_post_init(self)
+    if self.max_running_requests is None:
+        self.max_running_requests = DEFAULT_MAX_RUNNING_REQUESTS
+    if self.max_total_tokens is None:
+        self.max_total_tokens = DEFAULT_MAX_TOTAL_TOKENS
+    self.cuda_graph_max_bs = 4
+
+
+server_args_mod.ServerArgs.__post_init__ = patched_post_init
+
+process_socket_map = weakref.WeakKeyDictionary()
+
+
+def launch_server_cmd(command: str, host: str = "0.0.0.0", port: int = None):
+    """
+    Launch the server using the given command.
+    If no port is specified, a free port is reserved.
+    """
+    if port is None:
+        port, lock_socket = reserve_port(host)
+    else:
+        lock_socket = None
+
+    extra_flags = (
+        f"--max-running-requests {DEFAULT_MAX_RUNNING_REQUESTS} "
+        f"--max-total-tokens {DEFAULT_MAX_TOTAL_TOKENS} "
+        f"--cuda-graph-max-bs 4"
+    )
+
+    full_command = f"{command} --port {port} {extra_flags}"
+    process = execute_shell_command(full_command)
+
+    if lock_socket is not None:
+        process_socket_map[process] = lock_socket
+
+    return process, port
--- a/python/sglang/utils.py
+++ b/python/sglang/utils.py
@@ -458,7 +458,7 @@ def wait_for_server(base_url: str, timeout: int = None) -> None:
                    NOTE: Typically, the server runs in a separate terminal.
                    In this notebook, we run the server and notebook code together, so their outputs are combined.
                    To improve clarity, the server logs are displayed in the original black color, while the notebook outputs are highlighted in blue.
-                    We are running those notebooks in a CI parallel environment, so the throughput is not representative of the actual performance.
+                    We are running those notebooks in a CI environment, so the throughput is not representative of the actual performance.
                    """
                )
                break