[Fix] Not skip NVML Check on AMD Platform (#3135)

2025-01-26 13:33:51 +08:00
parent 822bae8c00
commit 66283dbc0c
2 changed files with 6 additions and 3 deletions
--- a/python/sglang/srt/distributed/device_communicators/custom_all_reduce.py
+++ b/python/sglang/srt/distributed/device_communicators/custom_all_reduce.py
@@ -185,9 +185,12 @@ class CustomAllreduce:
        # test nvlink first, this will filter out most of the cases
        # where custom allreduce is not supported
        # this checks hardware and driver support for NVLink
-        assert is_cuda()
+        if is_cuda():
+            assert is_cuda()

-        full_nvlink = is_full_nvlink(physical_device_ids)
+            full_nvlink = is_full_nvlink(physical_device_ids)
+        else:
+            full_nvlink = False
        if world_size > 2 and not full_nvlink:
            logger.warning(
                "Custom allreduce is disabled because it's not supported on"
--- a/python/sglang/srt/utils.py
+++ b/python/sglang/srt/utils.py
@@ -73,7 +73,7 @@ def is_hip() -> bool:


 def is_cuda():
-    return hasattr(torch, "cuda") and torch.cuda.is_available()
+    return hasattr(torch, "cuda") and torch.version.cuda is not None


 def is_cuda_alike():