diff --git a/vllm-v0.6.2/vllm_mlu/vllm_mlu/worker/cache_engine.py b/vllm-v0.6.2/vllm_mlu/vllm_mlu/worker/cache_engine.py
index 3be0fc2..a4ddc36 100644
--- a/vllm-v0.6.2/vllm_mlu/vllm_mlu/worker/cache_engine.py
+++ b/vllm-v0.6.2/vllm_mlu/vllm_mlu/worker/cache_engine.py
@@ -28,19 +28,19 @@ def vllm__worker__cache_engine__CacheEngine___allocate_kv_cache(
             cap num_blocks to avoid exceeding CNNL int32 element limit
     '''
     # CNNL operators have a max supported tensor element count of INT32_MAX.
-    # If the kv_cache tensor would exceed this limit, reduce num_blocks.
+    # num_blocks should already be capped by determine_num_available_blocks,
+    # this is a defensive check to catch any edge cases.
     CNNL_MAX_TENSOR_ELEMENTS = 2**31 - 1
     total_elements = 1
     for dim in kv_cache_shape:
         total_elements *= dim
     if total_elements > CNNL_MAX_TENSOR_ELEMENTS:
-        # Calculate the max num_blocks that fits within the limit.
-        # kv_cache_shape = (2, num_blocks, num_kv_heads, block_size, head_size)
         elements_per_block = total_elements // num_blocks
         max_num_blocks = CNNL_MAX_TENSOR_ELEMENTS // elements_per_block
         logger.warning(
             "KV cache tensor elements (%d) exceed CNNL max (%d). "
-            "Reducing num_blocks from %d to %d.",
+            "Reducing num_blocks from %d to %d. This indicates "
+            "determine_num_available_blocks did not cap correctly.",
             total_elements, CNNL_MAX_TENSOR_ELEMENTS,
             num_blocks, max_num_blocks)
         num_blocks = max_num_blocks