bugfix for mooncake (#3535)
### What this PR does / why we need it? bugfix for mooncake, remove useless judgement. ### How was this patch tested? by ci - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 Signed-off-by: zouyida2052 <zouyida2002@gmail.com>
This commit is contained in:
@@ -943,7 +943,7 @@ class MooncakeConnectorWorker:
|
|||||||
# kv_transfer variables
|
# kv_transfer variables
|
||||||
self.vllm_config = vllm_config
|
self.vllm_config = vllm_config
|
||||||
self.block_size = vllm_config.cache_config.block_size
|
self.block_size = vllm_config.cache_config.block_size
|
||||||
if self.vllm_config.model_config.is_deepseek_mla or self.use_sparse:
|
if self.vllm_config.model_config.is_deepseek_mla:
|
||||||
self.num_need_pulls = 1
|
self.num_need_pulls = 1
|
||||||
else:
|
else:
|
||||||
num_d_block_heads = max(1,
|
num_d_block_heads = max(1,
|
||||||
|
|||||||
Reference in New Issue
Block a user