[Feature] SPMD for SGLang + Verl (#3852)

2025-03-01 01:53:10 +08:00
parent bac414ab53
commit e3e0bc50a9
19 changed files with 890 additions and 202 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -82,6 +82,7 @@ class ServerArgs:
    dist_timeout: Optional[int] = None  # timeout for torch.distributed
    download_dir: Optional[str] = None
    base_gpu_id: int = 0
+    gpu_id_step: int = 1

    # Logging
    log_level: str = "info"
@@ -552,6 +553,12 @@ class ServerArgs:
            default=ServerArgs.base_gpu_id,
            help="The base GPU ID to start allocating GPUs from. Useful when running multiple instances on the same machine.",
        )
+        parser.add_argument(
+            "--gpu-id-step",
+            type=int,
+            default=ServerArgs.gpu_id_step,
+            help="The delta between consecutive GPU IDs that are used. For example, setting it to 2 will use GPU 0,2,4,...",
+        )

        # Logging
        parser.add_argument(
@@ -957,6 +964,7 @@ class ServerArgs:
            and (self.lora_paths is None or self.disable_radix_cache)
        ), "compatibility of lora and cuda graph and radix attention is in progress"
        assert self.base_gpu_id >= 0, "base_gpu_id must be non-negative"
+        assert self.gpu_id_step >= 1, "gpu_id_step must be positive"

        if isinstance(self.lora_paths, list):
            lora_paths = self.lora_paths