Add initial support for intel Gaudi accelerators (#2121)
This commit is contained in:
@@ -31,6 +31,9 @@ srt_hip = ["sglang[runtime_common]", "torch", "vllm==0.6.3.dev13"]
|
|||||||
# xpu is not enabled in public vllm and torch whl,
|
# xpu is not enabled in public vllm and torch whl,
|
||||||
# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
|
# need to follow https://docs.vllm.ai/en/latest/getting_started/xpu-installation.htmlinstall vllm
|
||||||
srt_xpu = ["sglang[runtime_common]"]
|
srt_xpu = ["sglang[runtime_common]"]
|
||||||
|
#For Intel Gaudi(device : hpu) follow the installation guide
|
||||||
|
#https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html
|
||||||
|
srt_hpu = ["sglang[runtime_common]"]
|
||||||
|
|
||||||
openai = ["openai>=1.0", "tiktoken"]
|
openai = ["openai>=1.0", "tiktoken"]
|
||||||
anthropic = ["anthropic>=0.20.0"]
|
anthropic = ["anthropic>=0.20.0"]
|
||||||
@@ -46,9 +49,11 @@ test = [
|
|||||||
all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]
|
all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]
|
||||||
all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]
|
all_hip = ["sglang[srt_hip]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]
|
||||||
all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]
|
all_xpu = ["sglang[srt_xpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]
|
||||||
|
all_hpu = ["sglang[srt_hpu]", "sglang[openai]", "sglang[anthropic]", "sglang[litellm]"]
|
||||||
dev = ["sglang[all]", "sglang[test]"]
|
dev = ["sglang[all]", "sglang[test]"]
|
||||||
dev_hip = ["sglang[all_hip]", "sglang[test]"]
|
dev_hip = ["sglang[all_hip]", "sglang[test]"]
|
||||||
dev_xpu = ["sglang[all_xpu]", "sglang[test]"]
|
dev_xpu = ["sglang[all_xpu]", "sglang[test]"]
|
||||||
|
dev_hpu = ["sglang[all_hpu]", "sglang[test]"]
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
"Homepage" = "https://github.com/sgl-project/sglang"
|
"Homepage" = "https://github.com/sgl-project/sglang"
|
||||||
|
|||||||
@@ -278,10 +278,7 @@ def correctness_test(
|
|||||||
|
|
||||||
|
|
||||||
def synchronize(device):
|
def synchronize(device):
|
||||||
if device == "cuda":
|
torch.get_device_module(device).synchronize()
|
||||||
torch.cuda.synchronize()
|
|
||||||
elif device == "xpu":
|
|
||||||
torch.xpu.synchronize()
|
|
||||||
|
|
||||||
|
|
||||||
def latency_test_run_once(
|
def latency_test_run_once(
|
||||||
|
|||||||
@@ -176,14 +176,15 @@ class ModelRunner:
|
|||||||
def init_torch_distributed(self):
|
def init_torch_distributed(self):
|
||||||
logger.info("Init torch distributed begin.")
|
logger.info("Init torch distributed begin.")
|
||||||
# Init torch distributed
|
# Init torch distributed
|
||||||
|
torch.get_device_module(self.device).set_device(self.gpu_id)
|
||||||
if self.device == "cuda":
|
if self.device == "cuda":
|
||||||
torch.cuda.set_device(self.gpu_id)
|
|
||||||
backend = "nccl"
|
backend = "nccl"
|
||||||
# ToDO(liangan1):Just use gloo to bypass the initilization fail
|
# ToDO(liangan1):Just use gloo to bypass the initilization fail
|
||||||
# Need to use xccl for xpu backend in the future
|
# Need to use xccl for xpu backend in the future
|
||||||
elif self.device == "xpu":
|
elif self.device == "xpu":
|
||||||
torch.xpu.set_device(self.gpu_id)
|
|
||||||
backend = "gloo"
|
backend = "gloo"
|
||||||
|
elif self.device == "hpu":
|
||||||
|
backend = "hccl"
|
||||||
|
|
||||||
if not self.server_args.enable_p2p_check:
|
if not self.server_args.enable_p2p_check:
|
||||||
monkey_patch_vllm_p2p_access_check(self.gpu_id)
|
monkey_patch_vllm_p2p_access_check(self.gpu_id)
|
||||||
|
|||||||
@@ -306,7 +306,7 @@ class ServerArgs:
|
|||||||
"--device",
|
"--device",
|
||||||
type=str,
|
type=str,
|
||||||
default="cuda",
|
default="cuda",
|
||||||
choices=["cuda", "xpu"],
|
choices=["cuda", "xpu", "hpu"],
|
||||||
help="The device type.",
|
help="The device type.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
|||||||
Reference in New Issue
Block a user