diff --git a/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_layerwise_connector.py b/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_layerwise_connector.py index f7206365..e5de1133 100644 --- a/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_layerwise_connector.py +++ b/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_layerwise_connector.py @@ -766,7 +766,6 @@ class MooncakeLayerwiseConnectorScheduler: def update_state_after_alloc(self, request: "Request", blocks: "KVCacheBlocks", num_external_tokens: int): params = request.kv_transfer_params - do_virtual = params.get("do_virtual") logger.debug( "MooncakeLayerwiseConnector update_state_after_alloc: num_external_tokens=%s, kv_transfer_params=%s", num_external_tokens, @@ -774,6 +773,7 @@ class MooncakeLayerwiseConnectorScheduler: ) if params is not None and params.get("do_remote_prefill"): + do_virtual = params.get("do_virtual", False) local_block_ids = (blocks.get_block_ids()) if num_external_tokens > 0 else [] remote_cached_tokens = request.num_computed_tokens # Get unhashed blocks to pull from remote. diff --git a/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/pool_scheduler.py b/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/pool_scheduler.py index 2796bfa0..09cec3c5 100644 --- a/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/pool_scheduler.py +++ b/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/pool_scheduler.py @@ -336,7 +336,7 @@ class KVPoolScheduler: return False, None delay_free_blocks = len(block_ids) > 0 if delay_free_blocks: - logger.info("Delaying free of %d blocks for request %s", len(block_ids), request.request_id) + logger.debug("Delaying free of %d blocks for request %s", len(block_ids), request.request_id) return delay_free_blocks, None