model: support nvidia/Llama-3_3-Nemotron-Super-49B-v1 (#9067)

Co-authored-by: Kyle Huang <kylhuang@nvidia.com>
2025-08-17 11:48:15 +03:00
parent e47800e176
commit 845d12a979
6 changed files with 465 additions and 5 deletions
--- a/test/srt/models/test_generation_models.py
+++ b/test/srt/models/test_generation_models.py
@@ -77,6 +77,12 @@ ALL_MODELS = [
        trust_remote_code=True,
        skip_long_prompt=True,
    ),
+    ModelCase(
+        "nvidia/Llama-3_3-Nemotron-Super-49B-v1_5",
+        tp_size=2,
+        trust_remote_code=True,
+        skip_long_prompt=True,
+    ),
 ]

 TORCH_DTYPES = [torch.float16]