Support server based rollout in Verlengine (#4848)

Co-authored-by: Jin Pan <jpan236@wisc.edu> Co-authored-by: Chayenne <zhaochen20@outlook.com> Co-authored-by: Jinn <47354855+jhinpan@users.noreply.github.com>
2025-04-13 01:07:52 +08:00
parent 3e4794aad8
commit bc92107b03
10 changed files with 720 additions and 29 deletions
--- a/python/sglang/srt/managers/io_struct.py
+++ b/python/sglang/srt/managers/io_struct.py
@@ -700,10 +700,17 @@ class UpdateWeightsFromDistributedReqOutput:

@dataclass
 class UpdateWeightsFromTensorReqInput:
-    # List containing one serialized Dict[str, torch.Tensor] per TP worker
-    serialized_named_tensors: List[bytes]
-    load_format: Optional[str]
-    flush_cache: bool
+    """Update model weights from tensor input.
+
+    - Tensors are serialized for transmission
+    - Data is structured in JSON for easy transmission over HTTP
+    """
+
+    serialized_named_tensors: List[Union[str, bytes]]
+    # Optional format specification for loading
+    load_format: Optional[str] = None
+    # Whether to flush the cache after updating weights
+    flush_cache: bool = True


@dataclass