Support updating weights at once by stopping all requests (#6698)
Signed-off-by: Tianyu Zhou <albert.zty@antgroup.com> Co-authored-by: Zilin Zhu <zhuzilinallen@gmail.com>
This commit is contained in:
@@ -662,7 +662,9 @@ async def configure_logging(obj: ConfigureLoggingReq, request: Request):
|
||||
async def abort_request(obj: AbortReq, request: Request):
|
||||
"""Abort a request."""
|
||||
try:
|
||||
_global_state.tokenizer_manager.abort_request(rid=obj.rid)
|
||||
_global_state.tokenizer_manager.abort_request(
|
||||
rid=obj.rid, abort_all=obj.abort_all
|
||||
)
|
||||
return Response(status_code=200)
|
||||
except Exception as e:
|
||||
return _create_error_response(e)
|
||||
|
||||
@@ -236,7 +236,7 @@ class CompletionResponseStreamChoice(BaseModel):
|
||||
index: int
|
||||
text: str
|
||||
logprobs: Optional[LogProbs] = None
|
||||
finish_reason: Optional[Literal["stop", "length", "content_filter"]] = None
|
||||
finish_reason: Optional[Literal["stop", "length", "content_filter", "abort"]] = None
|
||||
matched_stop: Union[None, int, str] = None
|
||||
hidden_states: Optional[object] = None
|
||||
|
||||
@@ -510,7 +510,9 @@ class ChatCompletionResponseStreamChoice(BaseModel):
|
||||
delta: DeltaMessage
|
||||
logprobs: Optional[Union[LogProbs, ChoiceLogprobs]] = None
|
||||
finish_reason: Optional[
|
||||
Literal["stop", "length", "tool_calls", "content_filter", "function_call"]
|
||||
Literal[
|
||||
"stop", "length", "tool_calls", "content_filter", "function_call", "abort"
|
||||
]
|
||||
] = None
|
||||
matched_stop: Union[None, int, str] = None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user