Support nvidia/NVIDIA-Nemotron-Nano-9B-v2-FP8/NVFP4 (#11866)

This commit is contained in:
Netanel Haber
2025-10-23 12:29:02 +03:00
committed by GitHub
parent 36a4cad7b0
commit d6fee73d1f
10 changed files with 207 additions and 127 deletions

View File

@@ -1,7 +1,7 @@
import unittest
from types import SimpleNamespace
from sglang.srt.utils import kill_process_tree
from sglang.srt.utils import is_blackwell, kill_process_tree
from sglang.test.few_shot_gsm8k import run_eval
from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
@@ -12,9 +12,11 @@ from sglang.test.test_utils import (
class TestNvidiaNemotronNanoV2(CustomTestCase):
model = "nvidia/NVIDIA-Nemotron-Nano-9B-v2"
accuracy = 0.87
@classmethod
def setUpClass(cls):
cls.model = "nvidia/NVIDIA-Nemotron-Nano-9B-v2"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server(
cls.model,
@@ -42,7 +44,18 @@ class TestNvidiaNemotronNanoV2(CustomTestCase):
)
metrics = run_eval(args)
print(f"{metrics=}")
self.assertGreater(metrics["accuracy"], 0.87)
self.assertGreaterEqual(metrics["accuracy"], self.accuracy)
class TestNvidiaNemotronNanoV2FP8(TestNvidiaNemotronNanoV2):
accuracy = 0.87
model = "nvidia/NVIDIA-Nemotron-Nano-9B-v2-FP8"
@unittest.skipIf(not is_blackwell(), "NVFP4 only supported on blackwell")
class TestNvidiaNemotronNanoV2NVFP4(TestNvidiaNemotronNanoV2):
accuracy = 0.855
model = "nvidia/NVIDIA-Nemotron-Nano-9B-v2-NVFP4"
if __name__ == "__main__":