model: support nvidia/Llama-3_3-Nemotron-Super-49B-v1 (#9067)

Co-authored-by: Kyle Huang <kylhuang@nvidia.com>
This commit is contained in:
Netanel Haber
2025-08-17 11:48:15 +03:00
committed by GitHub
parent e47800e176
commit 845d12a979
6 changed files with 465 additions and 5 deletions

View File

@@ -231,11 +231,14 @@ class HFRunner:
# Load the model and tokenizer
if self.model_type == "generation":
config = AutoConfig.from_pretrained(model_path)
if model_archs := getattr(config, "architectures"):
model_cls = getattr(transformers, model_archs[0])
else:
config = AutoConfig.from_pretrained(
model_path, trust_remote_code=self.trust_remote_code
)
if self.trust_remote_code:
model_cls = AutoModelForCausalLM
else:
model_arch = getattr(config, "architectures")[0]
model_cls = getattr(transformers, model_arch)
self.base_model = model_cls.from_pretrained(
model_path,
torch_dtype=torch_dtype,