Support incremental streaming of logprob/token_ids between scheduler and detokenizer (#6225)

Co-authored-by: SangBin Cho <rkooo567@gmail.com>
This commit is contained in:
Lianmin Zheng
2025-05-12 14:33:38 -07:00
committed by GitHub
parent f1c896007a
commit d18c6b3358
9 changed files with 257 additions and 86 deletions

View File

@@ -98,6 +98,7 @@ class ServerArgs:
show_time_cost: bool = False
enable_metrics: bool = False
decode_log_interval: int = 40
enable_request_time_stats_logging: bool = False
# API related
api_key: Optional[str] = None
@@ -785,6 +786,12 @@ class ServerArgs:
default=ServerArgs.decode_log_interval,
help="The log interval of decode batch.",
)
parser.add_argument(
"--enable-request-time-stats-logging",
action="store_true",
default=ServerArgs.enable_request_time_stats_logging,
help="Enable per request time stats logging",
)
# API related
parser.add_argument(