Revert "Support nvidia/NVIDIA-Nemotron-Nano-9B-v2-FP8/NVFP4" (#12015)

This commit is contained in:
Liangsheng Yin
2025-10-23 21:27:58 +08:00
committed by GitHub
parent 32852fe9e9
commit 6c18addb6f
10 changed files with 127 additions and 207 deletions

View File

@@ -1,7 +1,7 @@
import unittest
from types import SimpleNamespace
from sglang.srt.utils import is_blackwell, kill_process_tree
from sglang.srt.utils import kill_process_tree
from sglang.test.few_shot_gsm8k import run_eval
from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
@@ -12,11 +12,9 @@ from sglang.test.test_utils import (
class TestNvidiaNemotronNanoV2(CustomTestCase):
model = "nvidia/NVIDIA-Nemotron-Nano-9B-v2"
accuracy = 0.87
@classmethod
def setUpClass(cls):
cls.model = "nvidia/NVIDIA-Nemotron-Nano-9B-v2"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server(
cls.model,
@@ -44,18 +42,7 @@ class TestNvidiaNemotronNanoV2(CustomTestCase):
)
metrics = run_eval(args)
print(f"{metrics=}")
self.assertGreaterEqual(metrics["accuracy"], self.accuracy)
class TestNvidiaNemotronNanoV2FP8(TestNvidiaNemotronNanoV2):
accuracy = 0.87
model = "nvidia/NVIDIA-Nemotron-Nano-9B-v2-FP8"
@unittest.skipIf(not is_blackwell(), "NVFP4 only supported on blackwell")
class TestNvidiaNemotronNanoV2NVFP4(TestNvidiaNemotronNanoV2):
accuracy = 0.855
model = "nvidia/NVIDIA-Nemotron-Nano-9B-v2-NVFP4"
self.assertGreater(metrics["accuracy"], 0.87)
if __name__ == "__main__":