diff --git a/python/sglang/srt/distributed/parallel_state.py b/python/sglang/srt/distributed/parallel_state.py index c507f4701..47872b6b9 100644 --- a/python/sglang/srt/distributed/parallel_state.py +++ b/python/sglang/srt/distributed/parallel_state.py @@ -1228,7 +1228,16 @@ def cleanup_dist_env_and_memory(shutdown_ray: bool = False): ray.shutdown() gc.collect() if not current_platform.is_cpu(): - torch.cuda.empty_cache() + if hasattr(torch, "cuda") and torch.cuda.is_available(): + torch.cuda.empty_cache() + if hasattr(torch._C, "_host_emptyCache"): + torch._C._host_emptyCache() + else: + logger.warning( + "torch._C._host_emptyCache() only available in Pytorch >=2.5" + ) + elif hasattr(torch, "xpu") and torch.xpu.is_available(): + torch.xpu.empty_cache() def in_the_same_node_as(pg: ProcessGroup, source_rank: int = 0) -> List[bool]: