[Feature] Support kv nz feature for DeepSeek decode node in disagg-prefill scenario (#3072)
By converting the KV cache from ND to NZ format when the decode node
receives it, this PR ensures that the KV NZ feature works correctly
during the decoding phase in disagg-prefill scenario.
- vLLM version: v0.11.0
- vLLM main:
83f478bb19
---------
Signed-off-by: Jade Zheng <zheng.shoujian@outlook.com>
Co-authored-by: ghphotoframe <854746559@qq.com>
Co-authored-by: alex101-ops <alex1015718386@gmail.com>
This commit is contained in:
@@ -39,6 +39,7 @@ class TestAscendConfig(TestBase):
|
||||
ascend_config = init_ascend_config(test_vllm_config)
|
||||
self.assertIsNone(ascend_config.expert_map_path)
|
||||
self.assertFalse(ascend_config.multistream_overlap_shared_expert)
|
||||
self.assertFalse(ascend_config.enable_kv_nz)
|
||||
|
||||
ascend_compilation_config = ascend_config.ascend_compilation_config
|
||||
self.assertTrue(ascend_compilation_config.fuse_norm_quant)
|
||||
@@ -53,6 +54,7 @@ class TestAscendConfig(TestBase):
|
||||
"multistream_overlap_shared_expert": True,
|
||||
"expert_map_path": "test_expert_map_path",
|
||||
"refresh": True,
|
||||
"enable_kv_nz": False
|
||||
}
|
||||
ascend_config = init_ascend_config(test_vllm_config)
|
||||
self.assertEqual(ascend_config.expert_map_path, "test_expert_map_path")
|
||||
@@ -61,6 +63,7 @@ class TestAscendConfig(TestBase):
|
||||
|
||||
ascend_compilation_config = ascend_config.ascend_compilation_config
|
||||
self.assertFalse(ascend_compilation_config.fuse_norm_quant)
|
||||
self.assertFalse(ascend_config.enable_kv_nz)
|
||||
|
||||
@_clean_up_ascend_config
|
||||
def test_init_ascend_config_enable_npugraph_ex(self):
|
||||
|
||||
Reference in New Issue
Block a user