[PD metrics] Add latency Histogram metrics of each stage for generate requests (#8710)

This commit is contained in:
Yingchun Lai
2025-09-16 01:52:49 +08:00
committed by GitHub
parent 57234d0c9c
commit b1721edbac
7 changed files with 77 additions and 11 deletions

View File

@@ -116,6 +116,7 @@ from sglang.srt.managers.schedule_batch import (
FINISH_ABORT,
MultimodalInputs,
Req,
RequestStage,
ScheduleBatch,
global_server_args_dict,
)
@@ -1232,6 +1233,9 @@ class Scheduler(
bootstrap_room=recv_req.bootstrap_room,
data_parallel_rank=recv_req.data_parallel_rank,
vocab_size=self.model_config.vocab_size,
metrics_collector=(
self.metrics_collector if self.enable_metrics else None
),
)
req.tokenizer = self.tokenizer
@@ -1768,6 +1772,7 @@ class Scheduler(
# only record queue time when enable_metrics is True to avoid overhead
for req in can_run_list:
req.queue_time_end = time.perf_counter()
req.add_latency(RequestStage.PREFILL_WAITING)
self.waiting_queue = [
x for x in self.waiting_queue if x not in set(can_run_list)