Auto-detect device if not specified in server arguments. (#4423)
This commit is contained in:
committed by
GitHub
parent
22c96f78a6
commit
bf63ee54ed
@@ -24,6 +24,7 @@ from sglang.srt.hf_transformers_utils import check_gguf_file
|
|||||||
from sglang.srt.reasoning_parser import ReasoningParser
|
from sglang.srt.reasoning_parser import ReasoningParser
|
||||||
from sglang.srt.utils import (
|
from sglang.srt.utils import (
|
||||||
get_amdgpu_memory_capacity,
|
get_amdgpu_memory_capacity,
|
||||||
|
get_device,
|
||||||
get_hpu_memory_capacity,
|
get_hpu_memory_capacity,
|
||||||
get_nvgpu_memory_capacity,
|
get_nvgpu_memory_capacity,
|
||||||
is_cuda,
|
is_cuda,
|
||||||
@@ -52,7 +53,7 @@ class ServerArgs:
|
|||||||
quantization: Optional[str] = None
|
quantization: Optional[str] = None
|
||||||
quantization_param_path: nullable_str = None
|
quantization_param_path: nullable_str = None
|
||||||
context_length: Optional[int] = None
|
context_length: Optional[int] = None
|
||||||
device: str = "cuda"
|
device: Optional[str] = None
|
||||||
served_model_name: Optional[str] = None
|
served_model_name: Optional[str] = None
|
||||||
chat_template: Optional[str] = None
|
chat_template: Optional[str] = None
|
||||||
is_embedding: bool = False
|
is_embedding: bool = False
|
||||||
@@ -185,6 +186,9 @@ class ServerArgs:
|
|||||||
if self.tokenizer_path is None:
|
if self.tokenizer_path is None:
|
||||||
self.tokenizer_path = self.model_path
|
self.tokenizer_path = self.model_path
|
||||||
|
|
||||||
|
if self.device is None:
|
||||||
|
self.device = get_device()
|
||||||
|
|
||||||
if self.served_model_name is None:
|
if self.served_model_name is None:
|
||||||
self.served_model_name = self.model_path
|
self.served_model_name = self.model_path
|
||||||
|
|
||||||
@@ -435,9 +439,8 @@ class ServerArgs:
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--device",
|
"--device",
|
||||||
type=str,
|
type=str,
|
||||||
default="cuda",
|
default=ServerArgs.device,
|
||||||
choices=["cuda", "xpu", "hpu", "cpu"],
|
help="The device to use ('cuda', 'xpu', 'hpu', 'cpu'). Defaults to auto-detection if not specified.",
|
||||||
help="The device type.",
|
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--served-model-name",
|
"--served-model-name",
|
||||||
|
|||||||
Reference in New Issue
Block a user