feat: add priority based scheduling with priority based request acceptance and preemption (#8746)
This commit is contained in:
@@ -228,6 +228,8 @@ class CompletionRequest(BaseModel):
|
||||
|
||||
# For request id
|
||||
rid: Optional[Union[List[str], str]] = None
|
||||
# Priority for the request
|
||||
priority: Optional[int] = None
|
||||
|
||||
# For customer metric labels
|
||||
customer_labels: Optional[Dict[str, str]] = None
|
||||
@@ -543,6 +545,8 @@ class ChatCompletionRequest(BaseModel):
|
||||
|
||||
# For request id
|
||||
rid: Optional[Union[List[str], str]] = None
|
||||
# Priority for the request
|
||||
priority: Optional[int] = None
|
||||
|
||||
# For PD disaggregation
|
||||
bootstrap_host: Optional[Union[List[str], str]] = None
|
||||
@@ -644,6 +648,8 @@ class EmbeddingRequest(BaseModel):
|
||||
|
||||
# The request id.
|
||||
rid: Optional[Union[List[str], str]] = None
|
||||
# Priority for the request
|
||||
priority: Optional[int] = None
|
||||
|
||||
|
||||
class EmbeddingObject(BaseModel):
|
||||
|
||||
@@ -149,6 +149,7 @@ class OpenAIServingChat(OpenAIServingBase):
|
||||
bootstrap_room=request.bootstrap_room,
|
||||
return_hidden_states=request.return_hidden_states,
|
||||
rid=request.rid,
|
||||
priority=request.priority,
|
||||
customer_labels=customer_labels,
|
||||
)
|
||||
|
||||
|
||||
@@ -107,6 +107,7 @@ class OpenAIServingCompletion(OpenAIServingBase):
|
||||
bootstrap_room=request.bootstrap_room,
|
||||
return_hidden_states=request.return_hidden_states,
|
||||
rid=request.rid,
|
||||
priority=request.priority,
|
||||
customer_labels=customer_labels,
|
||||
)
|
||||
|
||||
|
||||
@@ -125,6 +125,7 @@ class OpenAIServingEmbedding(OpenAIServingBase):
|
||||
adapted_request = EmbeddingReqInput(
|
||||
**prompt_kwargs,
|
||||
rid=request.rid,
|
||||
priority=request.priority,
|
||||
)
|
||||
|
||||
return adapted_request, request
|
||||
|
||||
Reference in New Issue
Block a user