diff --git a/vllm_ascend/distributed/kvpool/pool_scheduler.py b/vllm_ascend/distributed/kvpool/pool_scheduler.py index d1564ce7..e4274bec 100644 --- a/vllm_ascend/distributed/kvpool/pool_scheduler.py +++ b/vllm_ascend/distributed/kvpool/pool_scheduler.py @@ -29,8 +29,10 @@ class KVPoolScheduler: "load_async", False) # request_id -> (vllm cached tokes, kvpool cached tokens) self.load_specs: dict[str, LoadSpec] = {} - self.pcp_size = vllm_config.parallel_config.prefill_context_parallel_size - self.dcp_size = vllm_config.parallel_config.decode_context_parallel_size + self.pcp_size = getattr(vllm_config.parallel_config, + "prefill_context_parallel_size", 1) + self.dcp_size = getattr(vllm_config.parallel_config, + "decode_context_parallel_size", 1) self._block_size = vllm_config.cache_config.block_size if self.pcp_size > 1: