diff --git a/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/kv_transfer.py b/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/kv_transfer.py index a7e570fa..a6df8c39 100644 --- a/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/kv_transfer.py +++ b/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/kv_transfer.py @@ -187,7 +187,7 @@ class KVCacheStoreSendingThread(KVTransferThread): ends = ends[skip_block_num:] keys = keys[skip_block_num:] - logger.info( + logger.debug( "Storing KV cache for %d out of %d blocks (skip_block_num=%d) for request %s", len(keys), token_len // self.block_size, diff --git a/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/pool_scheduler.py b/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/pool_scheduler.py index 2629b695..2796bfa0 100644 --- a/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/pool_scheduler.py +++ b/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/pool_scheduler.py @@ -91,7 +91,7 @@ class KVPoolScheduler: else: need_to_allocate = num_external_hit_tokens - num_computed_tokens - logger.info( + logger.debug( "Reqid: %s, Total tokens %d, kvpool hit tokens: %d, need to load: %d", request.request_id, request.num_tokens,