Improve weight loading and code style (#3174)

This commit is contained in:
Lianmin Zheng
2025-01-27 03:00:41 -08:00
committed by GitHub
parent 351a72d40b
commit 53cef81587
11 changed files with 171 additions and 65 deletions

View File

@@ -75,6 +75,7 @@ class ServerArgs:
# Other runtime options
tp_size: int = 1
stream_interval: int = 1
stream_output: bool = False
random_seed: Optional[int] = None
constrained_json_whitespace_pattern: Optional[str] = None
watchdog_timeout: float = 300
@@ -500,6 +501,11 @@ class ServerArgs:
default=ServerArgs.stream_interval,
help="The interval (or buffer size) for streaming in terms of the token length. A smaller value makes streaming smoother, while a larger value makes the throughput higher",
)
parser.add_argument(
"--stream-output",
action="store_true",
help="Whether to output as a sequence of disjoint segments.",
)
parser.add_argument(
"--random-seed",
type=int,