lower log level in PD Disaggregation (#7589)

### What this PR does / why we need it?
This log is printed too frequently and unecessary, Thus lowering its
level from INFO to DEBUG.

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?

- vLLM version: v0.18.0
- vLLM main:
ed359c497a

---------

Signed-off-by: zouyida2052 <zouyida2002@gmail.com>
This commit is contained in:
zouyida2052
2026-03-24 18:03:17 +08:00
committed by GitHub
parent 0e3186f07c
commit 0210cc0b07
2 changed files with 2 additions and 2 deletions

View File

@@ -766,7 +766,6 @@ class MooncakeLayerwiseConnectorScheduler:
def update_state_after_alloc(self, request: "Request", blocks: "KVCacheBlocks", num_external_tokens: int):
params = request.kv_transfer_params
do_virtual = params.get("do_virtual")
logger.debug(
"MooncakeLayerwiseConnector update_state_after_alloc: num_external_tokens=%s, kv_transfer_params=%s",
num_external_tokens,
@@ -774,6 +773,7 @@ class MooncakeLayerwiseConnectorScheduler:
)
if params is not None and params.get("do_remote_prefill"):
do_virtual = params.get("do_virtual", False)
local_block_ids = (blocks.get_block_ids()) if num_external_tokens > 0 else []
remote_cached_tokens = request.num_computed_tokens
# Get unhashed blocks to pull from remote.

View File

@@ -336,7 +336,7 @@ class KVPoolScheduler:
return False, None
delay_free_blocks = len(block_ids) > 0
if delay_free_blocks:
logger.info("Delaying free of %d blocks for request %s", len(block_ids), request.request_id)
logger.debug("Delaying free of %d blocks for request %s", len(block_ids), request.request_id)
return delay_free_blocks, None