From bf63ee54ed6f5561f23a403f5267e97d41eeb650 Mon Sep 17 00:00:00 2001 From: vikram singh shekhawat Date: Sun, 16 Mar 2025 09:43:51 +0530 Subject: [PATCH] Auto-detect device if not specified in server arguments. (#4423) --- python/sglang/srt/server_args.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index d6cb878b9..554c9592d 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -24,6 +24,7 @@ from sglang.srt.hf_transformers_utils import check_gguf_file from sglang.srt.reasoning_parser import ReasoningParser from sglang.srt.utils import ( get_amdgpu_memory_capacity, + get_device, get_hpu_memory_capacity, get_nvgpu_memory_capacity, is_cuda, @@ -52,7 +53,7 @@ class ServerArgs: quantization: Optional[str] = None quantization_param_path: nullable_str = None context_length: Optional[int] = None - device: str = "cuda" + device: Optional[str] = None served_model_name: Optional[str] = None chat_template: Optional[str] = None is_embedding: bool = False @@ -185,6 +186,9 @@ class ServerArgs: if self.tokenizer_path is None: self.tokenizer_path = self.model_path + if self.device is None: + self.device = get_device() + if self.served_model_name is None: self.served_model_name = self.model_path @@ -435,9 +439,8 @@ class ServerArgs: parser.add_argument( "--device", type=str, - default="cuda", - choices=["cuda", "xpu", "hpu", "cpu"], - help="The device type.", + default=ServerArgs.device, + help="The device to use ('cuda', 'xpu', 'hpu', 'cpu'). Defaults to auto-detection if not specified.", ) parser.add_argument( "--served-model-name",