model: support nvidia/Llama-3_3-Nemotron-Super-49B-v1 (#9067)
Co-authored-by: Kyle Huang <kylhuang@nvidia.com>
This commit is contained in:
@@ -449,8 +449,10 @@ def set_cpu_offload_max_bytes(max_bytes: int) -> None:
|
||||
|
||||
|
||||
def maybe_offload_to_cpu(module: torch.nn.Module) -> torch.nn.Module:
|
||||
device = next(module.parameters()).device
|
||||
if (params := next(module.parameters(), None)) is None:
|
||||
return module
|
||||
|
||||
device = params.device
|
||||
if device == torch.device("cpu"):
|
||||
return module
|
||||
|
||||
|
||||
Reference in New Issue
Block a user