Update deps for mllama4 (#5215)
This commit is contained in:
@@ -38,7 +38,7 @@ runtime_common = [
|
||||
"pyzmq>=25.1.2",
|
||||
"soundfile==0.13.1",
|
||||
"torchao>=0.7.0",
|
||||
"transformers==4.51.0",
|
||||
"transformers==4.51.1",
|
||||
"uvicorn",
|
||||
"uvloop",
|
||||
"compressed-tensors",
|
||||
@@ -50,6 +50,7 @@ srt = [
|
||||
"sgl-kernel==0.0.8",
|
||||
"flashinfer_python==0.2.3",
|
||||
"torch==2.5.1",
|
||||
"torchvision==0.20.1",
|
||||
"cuda-python",
|
||||
"outlines>=0.0.44,<=0.1.11",
|
||||
"partial_json_parser",
|
||||
|
||||
@@ -840,7 +840,6 @@ class Scheduler(
|
||||
bootstrap_room=recv_req.bootstrap_room,
|
||||
)
|
||||
req.tokenizer = self.tokenizer
|
||||
req.queue_time_start = time.time()
|
||||
|
||||
if (
|
||||
recv_req.session_params is not None
|
||||
@@ -855,7 +854,6 @@ class Scheduler(
|
||||
# Create a new request from a previous session
|
||||
session = self.sessions[recv_req.session_params.id]
|
||||
req = session.create_req(recv_req, self.tokenizer)
|
||||
req.queue_time_start = time.time()
|
||||
if isinstance(req.finished_reason, FINISH_ABORT):
|
||||
self._add_request_to_queue(req)
|
||||
return
|
||||
@@ -958,6 +956,7 @@ class Scheduler(
|
||||
self.disagg_decode_prealloc_queue.add(req)
|
||||
|
||||
else:
|
||||
req.queue_time_start = time.time()
|
||||
self.waiting_queue.append(req)
|
||||
|
||||
def _extend_requests_to_queue(self, reqs: List[Req], is_retracted: bool = False):
|
||||
|
||||
Reference in New Issue
Block a user