Add support for nvidia modelopt fp8 kv cache (#3223)

This commit is contained in:
Zhiyu
2025-02-21 15:04:58 -08:00
committed by GitHub
parent 20b765a26e
commit c66b2c9cf1
4 changed files with 65 additions and 2 deletions

View File

@@ -0,0 +1,29 @@
import unittest
from vllm.model_executor.layers.quantization.kv_cache import BaseKVCacheMethod
from sglang.srt.layers.quantization.modelopt_quant import (
ModelOptFp8Config,
ModelOptFp8KVCacheMethod,
)
class TestModelOptFp8KVCacheMethod(unittest.TestCase):
def test_kv_cache_method_initialization(self):
"""Test that ModelOptFp8KVCacheMethod can be instantiated and
inherits from BaseKVCacheMethod."""
# Create a ModelOptFp8Config object
quant_config = ModelOptFp8Config(is_checkpoint_fp8_serialized=True)
# Instantiate the KV cache method
kv_cache_method = ModelOptFp8KVCacheMethod(quant_config)
# Check inheritance
self.assertIsInstance(kv_cache_method, BaseKVCacheMethod)
# Check that the quant_config is stored
self.assertEqual(kv_cache_method.quant_config, quant_config)
if __name__ == "__main__":
unittest.main()