feat: add tp_rank, pp_rank and dp_rank labels for scheduler metrics (#7597)

Co-authored-by: Stefan He <hebiaobuaa@gmail.com>
This commit is contained in:
Yingchun Lai
2025-07-17 08:55:59 +08:00
committed by GitHub
parent 4395c87a9b
commit 795668dc73
3 changed files with 33 additions and 15 deletions

View File

@@ -105,6 +105,7 @@ class ServerArgs:
crash_dump_folder: Optional[str] = None
show_time_cost: bool = False
enable_metrics: bool = False
enable_metrics_for_all_schedulers: bool = False
bucket_time_to_first_token: Optional[List[float]] = None
bucket_e2e_request_latency: Optional[List[float]] = None
bucket_inter_token_latency: Optional[List[float]] = None
@@ -1002,6 +1003,13 @@ class ServerArgs:
action="store_true",
help="Enable log prometheus metrics.",
)
parser.add_argument(
"--enable-metrics-for-all-schedulers",
action="store_true",
help="Enable --enable-metrics-for-all-schedulers when you want schedulers on all TP ranks (not just TP 0) "
"to record request metrics separately. This is especially useful when dp_attention is enabled, as "
"otherwise all metrics appear to come from TP 0.",
)
parser.add_argument(
"--bucket-time-to-first-token",
type=float,