Support server based rollout in Verlengine (#4848)

Co-authored-by: Jin Pan <jpan236@wisc.edu>
Co-authored-by: Chayenne <zhaochen20@outlook.com>
Co-authored-by: Jinn <47354855+jhinpan@users.noreply.github.com>
This commit is contained in:
tianlian yi
2025-04-13 01:07:52 +08:00
committed by GitHub
parent 3e4794aad8
commit bc92107b03
10 changed files with 720 additions and 29 deletions

View File

@@ -700,10 +700,17 @@ class UpdateWeightsFromDistributedReqOutput:
@dataclass
class UpdateWeightsFromTensorReqInput:
# List containing one serialized Dict[str, torch.Tensor] per TP worker
serialized_named_tensors: List[bytes]
load_format: Optional[str]
flush_cache: bool
"""Update model weights from tensor input.
- Tensors are serialized for transmission
- Data is structured in JSON for easy transmission over HTTP
"""
serialized_named_tensors: List[Union[str, bytes]]
# Optional format specification for loading
load_format: Optional[str] = None
# Whether to flush the cache after updating weights
flush_cache: bool = True
@dataclass