model: support nvidia/Llama-3_3-Nemotron-Super-49B-v1 (#9067)

Co-authored-by: Kyle Huang <kylhuang@nvidia.com>
2025-08-17 11:48:15 +03:00
parent e47800e176
commit 845d12a979
6 changed files with 465 additions and 5 deletions
--- a/python/sglang/test/runners.py
+++ b/python/sglang/test/runners.py
@@ -231,11 +231,14 @@ class HFRunner:

        # Load the model and tokenizer
        if self.model_type == "generation":
-            config = AutoConfig.from_pretrained(model_path)
-            if model_archs := getattr(config, "architectures"):
-                model_cls = getattr(transformers, model_archs[0])
-            else:
+            config = AutoConfig.from_pretrained(
+                model_path, trust_remote_code=self.trust_remote_code
+            )
+            if self.trust_remote_code:
                model_cls = AutoModelForCausalLM
+            else:
+                model_arch = getattr(config, "architectures")[0]
+                model_cls = getattr(transformers, model_arch)
            self.base_model = model_cls.from_pretrained(
                model_path,
                torch_dtype=torch_dtype,