[PD] Fix server crash when using batch requests (#5531)
This commit is contained in:
@@ -96,8 +96,8 @@ class GenerateReqInput:
|
||||
return_hidden_states: bool = False
|
||||
|
||||
# For disaggregated inference
|
||||
bootstrap_host: Optional[str] = None
|
||||
bootstrap_room: Optional[int] = None
|
||||
bootstrap_host: Optional[Union[List[str], str]] = None
|
||||
bootstrap_room: Optional[Union[List[int], int]] = None
|
||||
|
||||
def normalize_batch_and_arguments(self):
|
||||
"""
|
||||
@@ -397,6 +397,12 @@ class GenerateReqInput:
|
||||
else None
|
||||
),
|
||||
return_hidden_states=self.return_hidden_states,
|
||||
bootstrap_host=(
|
||||
self.bootstrap_host[i] if self.bootstrap_host is not None else None
|
||||
),
|
||||
bootstrap_room=(
|
||||
self.bootstrap_room[i] if self.bootstrap_room is not None else None
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user