Clean up docs for server args and sampling parameters (generated by grok) (#7076)

2025-06-10 19:55:42 -07:00
parent f2a75a66c4
commit dbdf76ca98
4 changed files with 240 additions and 188 deletions
--- a/python/sglang/srt/managers/io_struct.py
+++ b/python/sglang/srt/managers/io_struct.py
@@ -87,7 +87,7 @@ class GenerateReqInput:

    # The modalities of the image data [image, multi-images, video]
    modalities: Optional[List[str]] = None
-    # LoRA related
+    # The path to the LoRA
    lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None

    # Session info for continual prompting
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -28,7 +28,6 @@ from sglang.srt.utils import (
    configure_ipv6,
    get_device,
    get_device_memory_capacity,
-    is_cuda,
    is_flashinfer_available,
    is_hip,
    is_port_available,
@@ -214,8 +213,8 @@ class ServerArgs:
    disable_shared_experts_fusion: bool = False
    disable_chunked_prefix_cache: bool = False
    disable_fast_image_processor: bool = False
-    warmups: Optional[str] = None
    enable_return_hidden_states: bool = False
+    warmups: Optional[str] = None

    # Debug tensor dumps
    debug_tensor_dump_output_folder: Optional[str] = None
@@ -536,10 +535,16 @@ class ServerArgs:
            help="The path of the tokenizer.",
        )
        parser.add_argument(
-            "--host", type=str, default=ServerArgs.host, help="The host of the server."
+            "--host",
+            type=str,
+            default=ServerArgs.host,
+            help="The host of the HTTP server.",
        )
        parser.add_argument(
-            "--port", type=int, default=ServerArgs.port, help="The port of the server."
+            "--port",
+            type=int,
+            default=ServerArgs.port,
+            help="The port of the HTTP server.",
        )
        parser.add_argument(
            "--tokenizer-mode",
@@ -694,6 +699,18 @@ class ServerArgs:
            "name, a tag name, or a commit id. If unspecified, will use "
            "the default version.",
        )
+        parser.add_argument(
+            "--impl",
+            type=str,
+            default=ServerArgs.impl,
+            help="Which implementation of the model to use.\n\n"
+            '* "auto" will try to use the SGLang implementation if it exists '
+            "and fall back to the Transformers implementation if no SGLang "
+            "implementation is available.\n"
+            '* "sglang" will use the SGLang model implementation.\n'
+            '* "transformers" will use the Transformers model '
+            "implementation.\n",
+        )

        # Memory and scheduling
        parser.add_argument(
@@ -752,18 +769,6 @@ class ServerArgs:
            default=ServerArgs.page_size,
            help="The number of tokens in a page.",
        )
-        parser.add_argument(
-            "--impl",
-            type=str,
-            default=ServerArgs.impl,
-            help="Which implementation of the model to use.\n\n"
-            '* "auto" will try to use the SGLang implementation if it exists '
-            "and fall back to the Transformers implementation if no SGLang "
-            "implementation is available.\n"
-            '* "sglang" will use the SGLang model implementation.\n'
-            '* "transformers" will use the Transformers model '
-            "implementation.\n",
-        )

        # Other runtime options
        parser.add_argument(
@@ -1442,6 +1447,11 @@ class ServerArgs:
            action="store_true",
            help="Adopt base image processor instead of fast image processor.",
        )
+        parser.add_argument(
+            "--enable-return-hidden-states",
+            action="store_true",
+            help="Enable returning hidden states with responses.",
+        )
        parser.add_argument(
            "--warmups",
            type=str,
@@ -1469,12 +1479,6 @@ class ServerArgs:
            default=ServerArgs.debug_tensor_dump_inject,
            help="Inject the outputs from jax as the input of every layer.",
        )
-
-        parser.add_argument(
-            "--enable-return-hidden-states",
-            action="store_true",
-            help="Enable returning hidden states with responses.",
-        )
        parser.add_argument(
            "--debug-tensor-dump-prefill-only",
            action="store_true",