Support nvidia/NVIDIA-Nemotron-Nano-9B-v2-FP8/NVFP4 (#11866)

2025-10-23 12:29:02 +03:00
parent 36a4cad7b0
commit d6fee73d1f
10 changed files with 207 additions and 127 deletions
--- a/test/srt/layers/attention/mamba/test_mamba_ssm.py
+++ b/test/srt/layers/attention/mamba/test_mamba_ssm.py
@@ -1,5 +1,6 @@
 # Adapted from https://github.com/vllm-project/vllm/blob/633f943e30a4444d890d26b81850f7217736f840/tests/kernels/mamba/test_mamba_ssm_ssd.py

+
 import pytest
 import torch
 import torch.nn.functional as F
@@ -289,3 +290,7 @@ def test_selective_state_update_with_heads_with_batch_indices(
    print(f"Output mean diff: {(out - out_ref).abs().mean().item()}")
    assert torch.allclose(state[state_indices, :], state_ref, rtol=rtol, atol=atol)
    assert torch.allclose(out, out_ref, rtol=rtol, atol=atol)
+
+
+if __name__ == "__main__":
+    pytest.main([__file__])