From bf63ee54ed6f5561f23a403f5267e97d41eeb650 Mon Sep 17 00:00:00 2001
From: vikram singh shekhawat <vshekhawat@habana.ai>
Date: Sun, 16 Mar 2025 09:43:51 +0530
Subject: [PATCH] Auto-detect device if not specified in server arguments.
 (#4423)

---
 python/sglang/srt/server_args.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py
index d6cb878b9..554c9592d 100644
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -24,6 +24,7 @@ from sglang.srt.hf_transformers_utils import check_gguf_file
 from sglang.srt.reasoning_parser import ReasoningParser
 from sglang.srt.utils import (
     get_amdgpu_memory_capacity,
+    get_device,
     get_hpu_memory_capacity,
     get_nvgpu_memory_capacity,
     is_cuda,
@@ -52,7 +53,7 @@ class ServerArgs:
     quantization: Optional[str] = None
     quantization_param_path: nullable_str = None
     context_length: Optional[int] = None
-    device: str = "cuda"
+    device: Optional[str] = None
     served_model_name: Optional[str] = None
     chat_template: Optional[str] = None
     is_embedding: bool = False
@@ -185,6 +186,9 @@ class ServerArgs:
         if self.tokenizer_path is None:
             self.tokenizer_path = self.model_path
 
+        if self.device is None:
+            self.device = get_device()
+
         if self.served_model_name is None:
             self.served_model_name = self.model_path
 
@@ -435,9 +439,8 @@ class ServerArgs:
         parser.add_argument(
             "--device",
             type=str,
-            default="cuda",
-            choices=["cuda", "xpu", "hpu", "cpu"],
-            help="The device type.",
+            default=ServerArgs.device,
+            help="The device to use ('cuda', 'xpu', 'hpu', 'cpu'). Defaults to auto-detection if not specified.",
         )
         parser.add_argument(
             "--served-model-name",