Simplify prometheus metrics (#1981)

Co-authored-by: Mohit Reddy <mohitreddy1996@users.noreply.github.com>
This commit is contained in:
Lianmin Zheng
2024-11-10 04:39:32 -08:00
committed by GitHub
parent ed53ac84b4
commit 1929c06762
11 changed files with 483 additions and 632 deletions

View File

@@ -31,7 +31,6 @@ ScheduleBatch -> ModelWorkerBatch -> ForwardBatch
import dataclasses
import logging
import time
from typing import List, Optional, Tuple, Union
import torch
@@ -255,16 +254,6 @@ class Req:
# For Qwen2-VL
self.mrope_position_delta = [] # use mutable object
# Lifetime traces
# time when request is created and added to waitlist
self.created_time = None
# time when request is added to prefill batch
self.queued_time = None
# time when request is being processed
self.started_time = None
# time when request is finished
self.finished_time = None
# whether request reached finished condition
def finished(self) -> bool:
return self.finished_reason is not None
@@ -1038,10 +1027,6 @@ class ScheduleBatch:
f"#req={(len(self.reqs))})"
)
def mark_reqs_started(self):
for req in self.reqs:
req.started_time = time.time()
@dataclasses.dataclass
class ModelWorkerBatch: