Support nvidia/NVIDIA-Nemotron-Nano-9B-v2-FP8/NVFP4 (#11866)

2025-10-23 12:29:02 +03:00
parent 36a4cad7b0
commit d6fee73d1f
10 changed files with 207 additions and 127 deletions
--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -19,6 +19,9 @@ suites = {
        TestFile("hicache/test_hicache_eagle.py", 150),
        TestFile("hicache/test_hicache_mla.py", 127),
        TestFile("hicache/test_hicache_storage.py", 127),
+        TestFile("layers/attention/mamba/test_causal_conv1d.py", 25),
+        TestFile("layers/attention/mamba/test_mamba_ssm.py", 50),
+        TestFile("layers/attention/mamba/test_mamba_ssm_ssd.py", 70),
        TestFile("lora/test_lora.py", 200),
        TestFile("lora/test_lora_eviction.py", 200),
        TestFile("lora/test_lora_eviction_policy.py", 200),
@@ -34,7 +37,7 @@ suites = {
        TestFile("models/test_embedding_models.py", 73),
        TestFile("models/test_encoder_embedding_models.py", 460),
        TestFile("models/test_generation_models.py", 103),
-        TestFile("models/test_nvidia_nemotron_nano_v2.py", 180),
+        TestFile("models/test_nvidia_nemotron_nano_v2.py", 300),
        TestFile("models/test_qwen_models.py", 82),
        TestFile("batch_invariant/test_batch_invariant_ops.py", 10),
        TestFile("models/test_reward_models.py", 132),
@@ -143,7 +146,7 @@ suites = {
        TestFile("hicache/test_hicache_storage_3fs_backend.py", 200),
        TestFile("hicache/test_hicache_storage_file_backend.py", 200),
        TestFile("hicache/test_hicache_storage_mooncake_backend.py", 400),
-        TestFile("layers/attention/mamba/test_mamba2_mixer.py", 110),
+        TestFile("layers/attention/mamba/test_mamba2_mixer.py", 50),
        TestFile("lora/test_lora_tp.py", 116),
        TestFile("models/test_glm4_moe_models.py", 100),
        TestFile("rl/test_update_weights_from_distributed.py", 103),