Support incremental streaming of logprob/token_ids between scheduler and detokenizer (#6225)
Co-authored-by: SangBin Cho <rkooo567@gmail.com>
This commit is contained in:
@@ -98,6 +98,7 @@ class ServerArgs:
|
||||
show_time_cost: bool = False
|
||||
enable_metrics: bool = False
|
||||
decode_log_interval: int = 40
|
||||
enable_request_time_stats_logging: bool = False
|
||||
|
||||
# API related
|
||||
api_key: Optional[str] = None
|
||||
@@ -785,6 +786,12 @@ class ServerArgs:
|
||||
default=ServerArgs.decode_log_interval,
|
||||
help="The log interval of decode batch.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enable-request-time-stats-logging",
|
||||
action="store_true",
|
||||
default=ServerArgs.enable_request_time_stats_logging,
|
||||
help="Enable per request time stats logging",
|
||||
)
|
||||
|
||||
# API related
|
||||
parser.add_argument(
|
||||
|
||||
Reference in New Issue
Block a user