Auto-detect device if not specified in server arguments. (#4423)

2025-03-16 09:43:51 +05:30
parent 22c96f78a6
commit bf63ee54ed
1 changed files with 7 additions and 4 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -24,6 +24,7 @@ from sglang.srt.hf_transformers_utils import check_gguf_file
 from sglang.srt.reasoning_parser import ReasoningParser
 from sglang.srt.utils import (
    get_amdgpu_memory_capacity,
+    get_device,
    get_hpu_memory_capacity,
    get_nvgpu_memory_capacity,
    is_cuda,
@@ -52,7 +53,7 @@ class ServerArgs:
    quantization: Optional[str] = None
    quantization_param_path: nullable_str = None
    context_length: Optional[int] = None
-    device: str = "cuda"
+    device: Optional[str] = None
    served_model_name: Optional[str] = None
    chat_template: Optional[str] = None
    is_embedding: bool = False
@@ -185,6 +186,9 @@ class ServerArgs:
        if self.tokenizer_path is None:
            self.tokenizer_path = self.model_path

+        if self.device is None:
+            self.device = get_device()
+
        if self.served_model_name is None:
            self.served_model_name = self.model_path

@@ -435,9 +439,8 @@ class ServerArgs:
        parser.add_argument(
            "--device",
            type=str,
-            default="cuda",
-            choices=["cuda", "xpu", "hpu", "cpu"],
-            help="The device type.",
+            default=ServerArgs.device,
+            help="The device to use ('cuda', 'xpu', 'hpu', 'cpu'). Defaults to auto-detection if not specified.",
        )
        parser.add_argument(
            "--served-model-name",