Support updating weights at once by stopping all requests (#6698)

Signed-off-by: Tianyu Zhou <albert.zty@antgroup.com>
Co-authored-by: Zilin Zhu <zhuzilinallen@gmail.com>
This commit is contained in:
Albert
2025-07-03 13:26:06 +08:00
committed by GitHub
parent b044400dd3
commit d3c275b117
7 changed files with 190 additions and 13 deletions

View File

@@ -662,7 +662,9 @@ async def configure_logging(obj: ConfigureLoggingReq, request: Request):
async def abort_request(obj: AbortReq, request: Request):
"""Abort a request."""
try:
_global_state.tokenizer_manager.abort_request(rid=obj.rid)
_global_state.tokenizer_manager.abort_request(
rid=obj.rid, abort_all=obj.abort_all
)
return Response(status_code=200)
except Exception as e:
return _create_error_response(e)

View File

@@ -236,7 +236,7 @@ class CompletionResponseStreamChoice(BaseModel):
index: int
text: str
logprobs: Optional[LogProbs] = None
finish_reason: Optional[Literal["stop", "length", "content_filter"]] = None
finish_reason: Optional[Literal["stop", "length", "content_filter", "abort"]] = None
matched_stop: Union[None, int, str] = None
hidden_states: Optional[object] = None
@@ -510,7 +510,9 @@ class ChatCompletionResponseStreamChoice(BaseModel):
delta: DeltaMessage
logprobs: Optional[Union[LogProbs, ChoiceLogprobs]] = None
finish_reason: Optional[
Literal["stop", "length", "tool_calls", "content_filter", "function_call"]
Literal[
"stop", "length", "tool_calls", "content_filter", "function_call", "abort"
]
] = None
matched_stop: Union[None, int, str] = None