support prometheus metrics (#1853)
Co-authored-by: Lianmin Zheng <lianminzheng@gmail.com> Co-authored-by: Byron Hsu <byronhsu1230@gmail.com>
This commit is contained in:
@@ -31,6 +31,7 @@ ScheduleBatch -> ModelWorkerBatch -> ForwardBatch
|
||||
|
||||
import dataclasses
|
||||
import logging
|
||||
import time
|
||||
from typing import List, Optional, Tuple, Union
|
||||
|
||||
import torch
|
||||
@@ -254,6 +255,16 @@ class Req:
|
||||
# For Qwen2-VL
|
||||
self.mrope_position_delta = [] # use mutable object
|
||||
|
||||
# Lifetime traces
|
||||
# time when request is created and added to waitlist
|
||||
self.created_time = None
|
||||
# time when request is added to prefill batch
|
||||
self.queued_time = None
|
||||
# time when request is being processed
|
||||
self.started_time = None
|
||||
# time when request is finished
|
||||
self.finished_time = None
|
||||
|
||||
# whether request reached finished condition
|
||||
def finished(self) -> bool:
|
||||
return self.finished_reason is not None
|
||||
@@ -1028,6 +1039,9 @@ class ScheduleBatch:
|
||||
f"#req={(len(self.reqs))})"
|
||||
)
|
||||
|
||||
def mark_reqs_started(self):
|
||||
for req in self.reqs:
|
||||
req.started_time = time.time()
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ModelWorkerBatch:
|
||||
|
||||
Reference in New Issue
Block a user