[OAI] patch origin request_id logic (#7508)

This commit is contained in:
ybyang
2025-06-25 11:09:38 +08:00
committed by GitHub
parent 57ab776910
commit 03c039c48e
5 changed files with 19 additions and 5 deletions

View File

@@ -196,6 +196,9 @@ class CompletionRequest(BaseModel):
bootstrap_port: Optional[int] = None bootstrap_port: Optional[int] = None
bootstrap_room: Optional[int] = None bootstrap_room: Optional[int] = None
# For request id
rid: Optional[Union[List[str], str]] = None
@field_validator("max_tokens") @field_validator("max_tokens")
@classmethod @classmethod
def validate_max_tokens_positive(cls, v): def validate_max_tokens_positive(cls, v):
@@ -430,8 +433,8 @@ class ChatCompletionRequest(BaseModel):
stream_reasoning: bool = True stream_reasoning: bool = True
chat_template_kwargs: Optional[Dict] = None chat_template_kwargs: Optional[Dict] = None
# The request id. # For request id
rid: Optional[str] = None rid: Optional[Union[List[str], str]] = None
# For PD disaggregation # For PD disaggregation
bootstrap_host: Optional[str] = None bootstrap_host: Optional[str] = None
@@ -529,7 +532,7 @@ class EmbeddingRequest(BaseModel):
user: Optional[str] = None user: Optional[str] = None
# The request id. # The request id.
rid: Optional[str] = None rid: Optional[Union[List[str], str]] = None
class EmbeddingObject(BaseModel): class EmbeddingObject(BaseModel):

View File

@@ -95,6 +95,7 @@ class OpenAIServingChat(OpenAIServingBase):
bootstrap_port=request.bootstrap_port, bootstrap_port=request.bootstrap_port,
bootstrap_room=request.bootstrap_room, bootstrap_room=request.bootstrap_room,
return_hidden_states=request.return_hidden_states, return_hidden_states=request.return_hidden_states,
rid=request.rid,
) )
return adapted_request, request return adapted_request, request

View File

@@ -87,6 +87,7 @@ class OpenAIServingCompletion(OpenAIServingBase):
bootstrap_port=request.bootstrap_port, bootstrap_port=request.bootstrap_port,
bootstrap_room=request.bootstrap_room, bootstrap_room=request.bootstrap_room,
return_hidden_states=request.return_hidden_states, return_hidden_states=request.return_hidden_states,
rid=request.rid,
) )
return adapted_request, request return adapted_request, request

View File

@@ -119,6 +119,7 @@ class OpenAIServingEmbedding(OpenAIServingBase):
adapted_request = EmbeddingReqInput( adapted_request = EmbeddingReqInput(
**prompt_kwargs, **prompt_kwargs,
rid=request.rid,
) )
return adapted_request, request return adapted_request, request

View File

@@ -319,8 +319,16 @@ class GenerateReqInput:
"""Normalize request IDs for batch processing.""" """Normalize request IDs for batch processing."""
if self.rid is None: if self.rid is None:
self.rid = [uuid.uuid4().hex for _ in range(num)] self.rid = [uuid.uuid4().hex for _ in range(num)]
elif not isinstance(self.rid, list): elif isinstance(self.rid, str):
raise ValueError("The rid should be a list for batch processing.") new_rids = [f"{self.rid}_{i}" for i in range(num)]
self.rid = new_rids
elif isinstance(self.rid, list):
if len(self.rid) != num:
raise ValueError(
"The specified rids length mismatch with the batch_size for batch processing."
)
else:
raise ValueError("The rid should be a string or a list of strings.")
def _normalize_logprob_params(self, num): def _normalize_logprob_params(self, num):
"""Normalize logprob-related parameters for batch processing.""" """Normalize logprob-related parameters for batch processing."""