Add support for nvidia modelopt fp8 kv cache (#3223)
This commit is contained in:
29
test/srt/test_modelopt_fp8kvcache.py
Normal file
29
test/srt/test_modelopt_fp8kvcache.py
Normal file
@@ -0,0 +1,29 @@
|
||||
import unittest
|
||||
|
||||
from vllm.model_executor.layers.quantization.kv_cache import BaseKVCacheMethod
|
||||
|
||||
from sglang.srt.layers.quantization.modelopt_quant import (
|
||||
ModelOptFp8Config,
|
||||
ModelOptFp8KVCacheMethod,
|
||||
)
|
||||
|
||||
|
||||
class TestModelOptFp8KVCacheMethod(unittest.TestCase):
|
||||
def test_kv_cache_method_initialization(self):
|
||||
"""Test that ModelOptFp8KVCacheMethod can be instantiated and
|
||||
inherits from BaseKVCacheMethod."""
|
||||
# Create a ModelOptFp8Config object
|
||||
quant_config = ModelOptFp8Config(is_checkpoint_fp8_serialized=True)
|
||||
|
||||
# Instantiate the KV cache method
|
||||
kv_cache_method = ModelOptFp8KVCacheMethod(quant_config)
|
||||
|
||||
# Check inheritance
|
||||
self.assertIsInstance(kv_cache_method, BaseKVCacheMethod)
|
||||
|
||||
# Check that the quant_config is stored
|
||||
self.assertEqual(kv_cache_method.quant_config, quant_config)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user