[PD] Propagate internal server errors from aborted requests to clients instead of blindly returning 200's (#8936)
This commit is contained in:
@@ -259,7 +259,7 @@ class DecodePreallocQueue:
|
||||
if len(req.origin_input_ids) > self.max_total_num_tokens:
|
||||
message = f"Request {req.rid} exceeds the maximum number of tokens: {len(req.origin_input_ids)} > {self.max_total_num_tokens}"
|
||||
logger.error(message)
|
||||
prepare_abort(req, message)
|
||||
prepare_abort(req, message, status_code=HTTPStatus.BAD_REQUEST)
|
||||
self.scheduler.stream_output([req], req.return_logprob)
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -178,7 +178,7 @@ class PrefillBootstrapQueue:
|
||||
if len(req.origin_input_ids) > self.max_total_num_tokens:
|
||||
message = f"Request {req.rid} exceeds the maximum number of tokens: {len(req.origin_input_ids)} > {self.max_total_num_tokens}"
|
||||
logger.error(message)
|
||||
prepare_abort(req, message)
|
||||
prepare_abort(req, message, status_code=HTTPStatus.BAD_REQUEST)
|
||||
self.scheduler.stream_output([req], req.return_logprob)
|
||||
return True
|
||||
return False
|
||||
|
||||
Reference in New Issue
Block a user