Support nvidia/NVIDIA-Nemotron-Nano-9B-v2-FP8/NVFP4 (#11866)
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
import unittest
|
||||
from types import SimpleNamespace
|
||||
|
||||
from sglang.srt.utils import kill_process_tree
|
||||
from sglang.srt.utils import is_blackwell, kill_process_tree
|
||||
from sglang.test.few_shot_gsm8k import run_eval
|
||||
from sglang.test.test_utils import (
|
||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
@@ -12,9 +12,11 @@ from sglang.test.test_utils import (
|
||||
|
||||
|
||||
class TestNvidiaNemotronNanoV2(CustomTestCase):
|
||||
model = "nvidia/NVIDIA-Nemotron-Nano-9B-v2"
|
||||
accuracy = 0.87
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "nvidia/NVIDIA-Nemotron-Nano-9B-v2"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
@@ -42,7 +44,18 @@ class TestNvidiaNemotronNanoV2(CustomTestCase):
|
||||
)
|
||||
metrics = run_eval(args)
|
||||
print(f"{metrics=}")
|
||||
self.assertGreater(metrics["accuracy"], 0.87)
|
||||
self.assertGreaterEqual(metrics["accuracy"], self.accuracy)
|
||||
|
||||
|
||||
class TestNvidiaNemotronNanoV2FP8(TestNvidiaNemotronNanoV2):
|
||||
accuracy = 0.87
|
||||
model = "nvidia/NVIDIA-Nemotron-Nano-9B-v2-FP8"
|
||||
|
||||
|
||||
@unittest.skipIf(not is_blackwell(), "NVFP4 only supported on blackwell")
|
||||
class TestNvidiaNemotronNanoV2NVFP4(TestNvidiaNemotronNanoV2):
|
||||
accuracy = 0.855
|
||||
model = "nvidia/NVIDIA-Nemotron-Nano-9B-v2-NVFP4"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user