lower log level in PD Disaggregation (#7589)
### What this PR does / why we need it?
This log is printed too frequently and unecessary, Thus lowering its
level from INFO to DEBUG.
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
- vLLM version: v0.18.0
- vLLM main:
ed359c497a
---------
Signed-off-by: zouyida2052 <zouyida2002@gmail.com>
This commit is contained in:
@@ -766,7 +766,6 @@ class MooncakeLayerwiseConnectorScheduler:
|
||||
|
||||
def update_state_after_alloc(self, request: "Request", blocks: "KVCacheBlocks", num_external_tokens: int):
|
||||
params = request.kv_transfer_params
|
||||
do_virtual = params.get("do_virtual")
|
||||
logger.debug(
|
||||
"MooncakeLayerwiseConnector update_state_after_alloc: num_external_tokens=%s, kv_transfer_params=%s",
|
||||
num_external_tokens,
|
||||
@@ -774,6 +773,7 @@ class MooncakeLayerwiseConnectorScheduler:
|
||||
)
|
||||
|
||||
if params is not None and params.get("do_remote_prefill"):
|
||||
do_virtual = params.get("do_virtual", False)
|
||||
local_block_ids = (blocks.get_block_ids()) if num_external_tokens > 0 else []
|
||||
remote_cached_tokens = request.num_computed_tokens
|
||||
# Get unhashed blocks to pull from remote.
|
||||
|
||||
@@ -336,7 +336,7 @@ class KVPoolScheduler:
|
||||
return False, None
|
||||
delay_free_blocks = len(block_ids) > 0
|
||||
if delay_free_blocks:
|
||||
logger.info("Delaying free of %d blocks for request %s", len(block_ids), request.request_id)
|
||||
logger.debug("Delaying free of %d blocks for request %s", len(block_ids), request.request_id)
|
||||
return delay_free_blocks, None
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user