support prometheus metrics (#1853)

Co-authored-by: Lianmin Zheng <lianminzheng@gmail.com>
Co-authored-by: Byron Hsu <byronhsu1230@gmail.com>
This commit is contained in:
Lzhang-hub
2024-11-06 12:42:53 +08:00
committed by GitHub
parent f5113e50ae
commit a146d9990e
7 changed files with 526 additions and 3 deletions

View File

@@ -17,6 +17,7 @@ limitations under the License.
import os
import random
import time
from collections import defaultdict
from contextlib import contextmanager
from enum import Enum, auto
@@ -306,6 +307,7 @@ class PrefillAdder:
):
# Non-chunked prefill
self.can_run_list.append(req)
req.queued_time = time.time()
self.tree_cache.inc_lock_ref(req.last_node)
self._prefill_one_req(
prefix_len,
@@ -324,6 +326,7 @@ class PrefillAdder:
req.extend_input_len = trunc_len
req.fill_ids = req.fill_ids[: len(req.prefix_indices) + trunc_len]
self.can_run_list.append(req)
req.queued_time = time.time()
self.new_inflight_req = req
self.tree_cache.inc_lock_ref(req.last_node)
self._prefill_one_req(prefix_len, trunc_len, 0)