feat: add priority based scheduling with priority based request acceptance and preemption (#8746)
This commit is contained in:
@@ -149,6 +149,7 @@ class OpenAIServingChat(OpenAIServingBase):
|
||||
bootstrap_room=request.bootstrap_room,
|
||||
return_hidden_states=request.return_hidden_states,
|
||||
rid=request.rid,
|
||||
priority=request.priority,
|
||||
customer_labels=customer_labels,
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user