Auto-detect device if not specified in server arguments. (#4423)

This commit is contained in:
vikram singh shekhawat
2025-03-16 09:43:51 +05:30
committed by GitHub
parent 22c96f78a6
commit bf63ee54ed

View File

@@ -24,6 +24,7 @@ from sglang.srt.hf_transformers_utils import check_gguf_file
from sglang.srt.reasoning_parser import ReasoningParser from sglang.srt.reasoning_parser import ReasoningParser
from sglang.srt.utils import ( from sglang.srt.utils import (
get_amdgpu_memory_capacity, get_amdgpu_memory_capacity,
get_device,
get_hpu_memory_capacity, get_hpu_memory_capacity,
get_nvgpu_memory_capacity, get_nvgpu_memory_capacity,
is_cuda, is_cuda,
@@ -52,7 +53,7 @@ class ServerArgs:
quantization: Optional[str] = None quantization: Optional[str] = None
quantization_param_path: nullable_str = None quantization_param_path: nullable_str = None
context_length: Optional[int] = None context_length: Optional[int] = None
device: str = "cuda" device: Optional[str] = None
served_model_name: Optional[str] = None served_model_name: Optional[str] = None
chat_template: Optional[str] = None chat_template: Optional[str] = None
is_embedding: bool = False is_embedding: bool = False
@@ -185,6 +186,9 @@ class ServerArgs:
if self.tokenizer_path is None: if self.tokenizer_path is None:
self.tokenizer_path = self.model_path self.tokenizer_path = self.model_path
if self.device is None:
self.device = get_device()
if self.served_model_name is None: if self.served_model_name is None:
self.served_model_name = self.model_path self.served_model_name = self.model_path
@@ -435,9 +439,8 @@ class ServerArgs:
parser.add_argument( parser.add_argument(
"--device", "--device",
type=str, type=str,
default="cuda", default=ServerArgs.device,
choices=["cuda", "xpu", "hpu", "cpu"], help="The device to use ('cuda', 'xpu', 'hpu', 'cpu'). Defaults to auto-detection if not specified.",
help="The device type.",
) )
parser.add_argument( parser.add_argument(
"--served-model-name", "--served-model-name",