Clean up docs for server args and sampling parameters (generated by grok) (#7076)
This commit is contained in:
@@ -87,7 +87,7 @@ class GenerateReqInput:
|
||||
|
||||
# The modalities of the image data [image, multi-images, video]
|
||||
modalities: Optional[List[str]] = None
|
||||
# LoRA related
|
||||
# The path to the LoRA
|
||||
lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None
|
||||
|
||||
# Session info for continual prompting
|
||||
|
||||
@@ -28,7 +28,6 @@ from sglang.srt.utils import (
|
||||
configure_ipv6,
|
||||
get_device,
|
||||
get_device_memory_capacity,
|
||||
is_cuda,
|
||||
is_flashinfer_available,
|
||||
is_hip,
|
||||
is_port_available,
|
||||
@@ -214,8 +213,8 @@ class ServerArgs:
|
||||
disable_shared_experts_fusion: bool = False
|
||||
disable_chunked_prefix_cache: bool = False
|
||||
disable_fast_image_processor: bool = False
|
||||
warmups: Optional[str] = None
|
||||
enable_return_hidden_states: bool = False
|
||||
warmups: Optional[str] = None
|
||||
|
||||
# Debug tensor dumps
|
||||
debug_tensor_dump_output_folder: Optional[str] = None
|
||||
@@ -536,10 +535,16 @@ class ServerArgs:
|
||||
help="The path of the tokenizer.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--host", type=str, default=ServerArgs.host, help="The host of the server."
|
||||
"--host",
|
||||
type=str,
|
||||
default=ServerArgs.host,
|
||||
help="The host of the HTTP server.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--port", type=int, default=ServerArgs.port, help="The port of the server."
|
||||
"--port",
|
||||
type=int,
|
||||
default=ServerArgs.port,
|
||||
help="The port of the HTTP server.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tokenizer-mode",
|
||||
@@ -694,6 +699,18 @@ class ServerArgs:
|
||||
"name, a tag name, or a commit id. If unspecified, will use "
|
||||
"the default version.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--impl",
|
||||
type=str,
|
||||
default=ServerArgs.impl,
|
||||
help="Which implementation of the model to use.\n\n"
|
||||
'* "auto" will try to use the SGLang implementation if it exists '
|
||||
"and fall back to the Transformers implementation if no SGLang "
|
||||
"implementation is available.\n"
|
||||
'* "sglang" will use the SGLang model implementation.\n'
|
||||
'* "transformers" will use the Transformers model '
|
||||
"implementation.\n",
|
||||
)
|
||||
|
||||
# Memory and scheduling
|
||||
parser.add_argument(
|
||||
@@ -752,18 +769,6 @@ class ServerArgs:
|
||||
default=ServerArgs.page_size,
|
||||
help="The number of tokens in a page.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--impl",
|
||||
type=str,
|
||||
default=ServerArgs.impl,
|
||||
help="Which implementation of the model to use.\n\n"
|
||||
'* "auto" will try to use the SGLang implementation if it exists '
|
||||
"and fall back to the Transformers implementation if no SGLang "
|
||||
"implementation is available.\n"
|
||||
'* "sglang" will use the SGLang model implementation.\n'
|
||||
'* "transformers" will use the Transformers model '
|
||||
"implementation.\n",
|
||||
)
|
||||
|
||||
# Other runtime options
|
||||
parser.add_argument(
|
||||
@@ -1442,6 +1447,11 @@ class ServerArgs:
|
||||
action="store_true",
|
||||
help="Adopt base image processor instead of fast image processor.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enable-return-hidden-states",
|
||||
action="store_true",
|
||||
help="Enable returning hidden states with responses.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--warmups",
|
||||
type=str,
|
||||
@@ -1469,12 +1479,6 @@ class ServerArgs:
|
||||
default=ServerArgs.debug_tensor_dump_inject,
|
||||
help="Inject the outputs from jax as the input of every layer.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--enable-return-hidden-states",
|
||||
action="store_true",
|
||||
help="Enable returning hidden states with responses.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--debug-tensor-dump-prefill-only",
|
||||
action="store_true",
|
||||
|
||||
Reference in New Issue
Block a user