model: support nvidia/Llama-3_3-Nemotron-Super-49B-v1 (#9067)
Co-authored-by: Kyle Huang <kylhuang@nvidia.com>
This commit is contained in:
@@ -231,11 +231,14 @@ class HFRunner:
|
||||
|
||||
# Load the model and tokenizer
|
||||
if self.model_type == "generation":
|
||||
config = AutoConfig.from_pretrained(model_path)
|
||||
if model_archs := getattr(config, "architectures"):
|
||||
model_cls = getattr(transformers, model_archs[0])
|
||||
else:
|
||||
config = AutoConfig.from_pretrained(
|
||||
model_path, trust_remote_code=self.trust_remote_code
|
||||
)
|
||||
if self.trust_remote_code:
|
||||
model_cls = AutoModelForCausalLM
|
||||
else:
|
||||
model_arch = getattr(config, "architectures")[0]
|
||||
model_cls = getattr(transformers, model_arch)
|
||||
self.base_model = model_cls.from_pretrained(
|
||||
model_path,
|
||||
torch_dtype=torch_dtype,
|
||||
|
||||
Reference in New Issue
Block a user