From f1cf6eefbec615bfec1c026f29c0f5bb06f00ba6 Mon Sep 17 00:00:00 2001
From: Chen Shengzhi <shengzhi.chen@intel.com>
Date: Thu, 13 Mar 2025 12:28:48 +0800
Subject: [PATCH] [Fix] Check the device backend before calling empty_cache
 function (#4212)

---
 python/sglang/srt/distributed/parallel_state.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/python/sglang/srt/distributed/parallel_state.py b/python/sglang/srt/distributed/parallel_state.py
index c507f4701..47872b6b9 100644
--- a/python/sglang/srt/distributed/parallel_state.py
+++ b/python/sglang/srt/distributed/parallel_state.py
@@ -1228,7 +1228,16 @@ def cleanup_dist_env_and_memory(shutdown_ray: bool = False):
         ray.shutdown()
     gc.collect()
     if not current_platform.is_cpu():
-        torch.cuda.empty_cache()
+        if hasattr(torch, "cuda") and torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            if hasattr(torch._C, "_host_emptyCache"):
+                torch._C._host_emptyCache()
+            else:
+                logger.warning(
+                    "torch._C._host_emptyCache() only available in Pytorch >=2.5"
+                )
+        elif hasattr(torch, "xpu") and torch.xpu.is_available():
+            torch.xpu.empty_cache()
 
 
 def in_the_same_node_as(pg: ProcessGroup, source_rank: int = 0) -> List[bool]: