support prometheus metrics (#1853)

Co-authored-by: Lianmin Zheng <lianminzheng@gmail.com>
Co-authored-by: Byron Hsu <byronhsu1230@gmail.com>
This commit is contained in:
Lzhang-hub
2024-11-06 12:42:53 +08:00
committed by GitHub
parent f5113e50ae
commit a146d9990e
7 changed files with 526 additions and 3 deletions

View File

@@ -31,6 +31,7 @@ ScheduleBatch -> ModelWorkerBatch -> ForwardBatch
import dataclasses
import logging
import time
from typing import List, Optional, Tuple, Union
import torch
@@ -254,6 +255,16 @@ class Req:
# For Qwen2-VL
self.mrope_position_delta = [] # use mutable object
# Lifetime traces
# time when request is created and added to waitlist
self.created_time = None
# time when request is added to prefill batch
self.queued_time = None
# time when request is being processed
self.started_time = None
# time when request is finished
self.finished_time = None
# whether request reached finished condition
def finished(self) -> bool:
return self.finished_reason is not None
@@ -1028,6 +1039,9 @@ class ScheduleBatch:
f"#req={(len(self.reqs))})"
)
def mark_reqs_started(self):
for req in self.reqs:
req.started_time = time.time()
@dataclasses.dataclass
class ModelWorkerBatch: