support prometheus metrics (#1853)
Co-authored-by: Lianmin Zheng <lianminzheng@gmail.com> Co-authored-by: Byron Hsu <byronhsu1230@gmail.com>
This commit is contained in:
@@ -17,6 +17,7 @@ limitations under the License.
|
||||
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from contextlib import contextmanager
|
||||
from enum import Enum, auto
|
||||
@@ -306,6 +307,7 @@ class PrefillAdder:
|
||||
):
|
||||
# Non-chunked prefill
|
||||
self.can_run_list.append(req)
|
||||
req.queued_time = time.time()
|
||||
self.tree_cache.inc_lock_ref(req.last_node)
|
||||
self._prefill_one_req(
|
||||
prefix_len,
|
||||
@@ -324,6 +326,7 @@ class PrefillAdder:
|
||||
req.extend_input_len = trunc_len
|
||||
req.fill_ids = req.fill_ids[: len(req.prefix_indices) + trunc_len]
|
||||
self.can_run_list.append(req)
|
||||
req.queued_time = time.time()
|
||||
self.new_inflight_req = req
|
||||
self.tree_cache.inc_lock_ref(req.last_node)
|
||||
self._prefill_one_req(prefix_len, trunc_len, 0)
|
||||
|
||||
Reference in New Issue
Block a user