model: support nvidia/Llama-3_3-Nemotron-Super-49B-v1 (#9067)

Co-authored-by: Kyle Huang <kylhuang@nvidia.com>
This commit is contained in:
Netanel Haber
2025-08-17 11:48:15 +03:00
committed by GitHub
parent e47800e176
commit 845d12a979
6 changed files with 465 additions and 5 deletions

View File

@@ -449,8 +449,10 @@ def set_cpu_offload_max_bytes(max_bytes: int) -> None:
def maybe_offload_to_cpu(module: torch.nn.Module) -> torch.nn.Module:
device = next(module.parameters()).device
if (params := next(module.parameters(), None)) is None:
return module
device = params.device
if device == torch.device("cpu"):
return module