model: support nvidia/Llama-3_3-Nemotron-Super-49B-v1 (#9067)

Co-authored-by: Kyle Huang <kylhuang@nvidia.com>
2025-08-17 11:48:15 +03:00
parent e47800e176
commit 845d12a979
6 changed files with 465 additions and 5 deletions
--- a/python/sglang/srt/utils.py
+++ b/python/sglang/srt/utils.py
@@ -449,8 +449,10 @@ def set_cpu_offload_max_bytes(max_bytes: int) -> None:


 def maybe_offload_to_cpu(module: torch.nn.Module) -> torch.nn.Module:
-    device = next(module.parameters()).device
+    if (params := next(module.parameters(), None)) is None:
+        return module

+    device = params.device
    if device == torch.device("cpu"):
        return module