Add support for nvidia modelopt fp8 kv cache (#3223)

2025-02-21 15:04:58 -08:00
parent 20b765a26e
commit c66b2c9cf1
4 changed files with 65 additions and 2 deletions
--- a/test/srt/test_modelopt_fp8kvcache.py
+++ b/test/srt/test_modelopt_fp8kvcache.py
@@ -0,0 +1,29 @@
+import unittest
+
+from vllm.model_executor.layers.quantization.kv_cache import BaseKVCacheMethod
+
+from sglang.srt.layers.quantization.modelopt_quant import (
+    ModelOptFp8Config,
+    ModelOptFp8KVCacheMethod,
+)
+
+
+class TestModelOptFp8KVCacheMethod(unittest.TestCase):
+    def test_kv_cache_method_initialization(self):
+        """Test that ModelOptFp8KVCacheMethod can be instantiated and
+        inherits from BaseKVCacheMethod."""
+        # Create a ModelOptFp8Config object
+        quant_config = ModelOptFp8Config(is_checkpoint_fp8_serialized=True)
+
+        # Instantiate the KV cache method
+        kv_cache_method = ModelOptFp8KVCacheMethod(quant_config)
+
+        # Check inheritance
+        self.assertIsInstance(kv_cache_method, BaseKVCacheMethod)
+
+        # Check that the quant_config is stored
+        self.assertEqual(kv_cache_method.quant_config, quant_config)
+
+
+if __name__ == "__main__":
+    unittest.main()