[NVIDIA] Add new SMs support for Spark & Thor (#11287)

Signed-off-by: Serge Panev <spanev@nvidia.com>
This commit is contained in:
Serge Panev
2025-10-21 11:02:24 -07:00
committed by GitHub
parent 97710ccd1a
commit 2b1da821b5
4 changed files with 22 additions and 8 deletions

View File

@@ -452,7 +452,15 @@ def get_available_gpu_memory(
if empty_cache:
torch.cuda.empty_cache()
free_gpu_memory, _ = torch.cuda.mem_get_info(gpu_id)
SHARED_SYSMEM_DEVICE_MEM_SMS = (87, 110, 121) # Orin, Thor, Spark
if get_device_sm() in SHARED_SYSMEM_DEVICE_MEM_SMS:
# On these devices, which use sysmem as device mem, torch.cuda.mem_get_info()
# only reports "free" memory, which can be lower than what is actually
# available due to not including cache memory. So we use the system available
# memory metric instead.
free_gpu_memory = psutil.virtual_memory().available
else:
free_gpu_memory, _ = torch.cuda.mem_get_info(gpu_id)
elif device == "xpu":
num_gpus = torch.xpu.device_count()