[releases/v0.18.0][Doc][Misc] Modifying Configuration Parameters (#8618)

### What this PR does / why we need it?
This PR renames the environment variable VLLM_NIXL_ABORT_REQUEST_TIMEOUT
to VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT to align with the Mooncake
connector naming convention. It also updates the documentation and test
configurations to reflect this change and adjusts the suggested timeout
value in the documentation to 480 seconds for consistency.

### Does this PR introduce _any_ user-facing change?
Yes. The environment variable for configuring the abort request timeout
has been renamed. Users should update their environment settings from
VLLM_NIXL_ABORT_REQUEST_TIMEOUT to VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT.

### How was this patch tested?
The changes were verified by updating the corresponding test
configuration files and ensuring consistency across the documentation.

---------

Signed-off-by: herizhen <1270637059@qq.com>
Signed-off-by: herizhen <59841270+herizhen@users.noreply.github.com>
This commit is contained in:
herizhen
2026-04-23 16:23:31 +08:00
committed by GitHub
parent ce92be29d2
commit ff76c6780e
20 changed files with 95 additions and 74 deletions

View File

@@ -434,6 +434,10 @@ msgstr ""
msgid "Consistency" msgid "Consistency"
msgstr "一致性" msgstr "一致性"
#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:236
msgid "Expert Map"
msgstr "专家映射"
#: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:237 #: ../../source/developer_guide/Design_Documents/eplb_swift_balancer.md:237
msgid "" msgid ""
"The expert map must be globally unique during initialization and update. " "The expert map must be globally unique during initialization and update. "

View File

@@ -526,7 +526,8 @@ Before you start, please
export ASCEND_TRANSPORT_PRINT=1 export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1 export ACL_OP_INIT_MODE=1
export ASCEND_A3_ENABLE=1 export ASCEND_A3_ENABLE=1
export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 # Timeout (in seconds) for automatically releasing the prefillers KV cache for a particular request.
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export ASCEND_RT_VISIBLE_DEVICES=$1 export ASCEND_RT_VISIBLE_DEVICES=$1
@@ -600,7 +601,8 @@ Before you start, please
export ASCEND_TRANSPORT_PRINT=1 export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1 export ACL_OP_INIT_MODE=1
export ASCEND_A3_ENABLE=1 export ASCEND_A3_ENABLE=1
export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 # Timeout (in seconds) for automatically releasing the prefillers KV cache for a particular request.
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export ASCEND_RT_VISIBLE_DEVICES=$1 export ASCEND_RT_VISIBLE_DEVICES=$1
@@ -676,7 +678,8 @@ Before you start, please
export ASCEND_TRANSPORT_PRINT=1 export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1 export ACL_OP_INIT_MODE=1
export ASCEND_A3_ENABLE=1 export ASCEND_A3_ENABLE=1
export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 # Timeout (in seconds) for automatically releasing the prefillers KV cache for a particular request.
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export TASK_QUEUE_ENABLE=1 export TASK_QUEUE_ENABLE=1
@@ -752,7 +755,8 @@ Before you start, please
export ASCEND_TRANSPORT_PRINT=1 export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1 export ACL_OP_INIT_MODE=1
export ASCEND_A3_ENABLE=1 export ASCEND_A3_ENABLE=1
export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 # Timeout (in seconds) for automatically releasing the prefillers KV cache for a particular request.
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export TASK_QUEUE_ENABLE=1 export TASK_QUEUE_ENABLE=1

View File

@@ -530,6 +530,8 @@ Before you start, please
export ASCEND_TRANSPORT_PRINT=1 export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1 export ACL_OP_INIT_MODE=1
export ASCEND_A3_ENABLE=1 export ASCEND_A3_ENABLE=1
# Timeout (in seconds) for automatically releasing the prefillers KV cache for a particular request.
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export TASK_QUEUE_ENABLE=1 export TASK_QUEUE_ENABLE=1
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/mooncake:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/mooncake:$LD_LIBRARY_PATH
export VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE=1 export VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE=1
@@ -598,6 +600,8 @@ Before you start, please
export ASCEND_TRANSPORT_PRINT=1 export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1 export ACL_OP_INIT_MODE=1
export ASCEND_A3_ENABLE=1 export ASCEND_A3_ENABLE=1
# Timeout (in seconds) for automatically releasing the prefillers KV cache for a particular request.
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export TASK_QUEUE_ENABLE=1 export TASK_QUEUE_ENABLE=1
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/mooncake:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/mooncake:$LD_LIBRARY_PATH
export VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE=1 export VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE=1

View File

@@ -766,7 +766,8 @@ Before you start, please
export ASCEND_TRANSPORT_PRINT=1 export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1 export ACL_OP_INIT_MODE=1
export ASCEND_A3_ENABLE=1 export ASCEND_A3_ENABLE=1
export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 # Timeout (in seconds) for automatically releasing the prefillers KV cache for a particular request.
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export ASCEND_RT_VISIBLE_DEVICES=$1 export ASCEND_RT_VISIBLE_DEVICES=$1
export VLLM_ASCEND_ENABLE_FLASHCOMM1=1 export VLLM_ASCEND_ENABLE_FLASHCOMM1=1
@@ -844,7 +845,8 @@ Before you start, please
export ASCEND_TRANSPORT_PRINT=1 export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1 export ACL_OP_INIT_MODE=1
export ASCEND_A3_ENABLE=1 export ASCEND_A3_ENABLE=1
export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 # Timeout (in seconds) for automatically releasing the prefillers KV cache for a particular request.
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export ASCEND_RT_VISIBLE_DEVICES=$1 export ASCEND_RT_VISIBLE_DEVICES=$1
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
@@ -926,7 +928,8 @@ Before you start, please
export ASCEND_TRANSPORT_PRINT=1 export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1 export ACL_OP_INIT_MODE=1
export ASCEND_A3_ENABLE=1 export ASCEND_A3_ENABLE=1
export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 # Timeout (in seconds) for automatically releasing the prefillers KV cache for a particular request.
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export TASK_QUEUE_ENABLE=1 export TASK_QUEUE_ENABLE=1
@@ -1007,7 +1010,8 @@ Before you start, please
export ASCEND_TRANSPORT_PRINT=1 export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1 export ACL_OP_INIT_MODE=1
export ASCEND_A3_ENABLE=1 export ASCEND_A3_ENABLE=1
export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 # Timeout (in seconds) for automatically releasing the prefillers KV cache for a particular request.
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export TASK_QUEUE_ENABLE=1 export TASK_QUEUE_ENABLE=1
@@ -1088,7 +1092,8 @@ Before you start, please
export ASCEND_TRANSPORT_PRINT=1 export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1 export ACL_OP_INIT_MODE=1
export ASCEND_A3_ENABLE=1 export ASCEND_A3_ENABLE=1
export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 # Timeout (in seconds) for automatically releasing the prefillers KV cache for a particular request.
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export TASK_QUEUE_ENABLE=1 export TASK_QUEUE_ENABLE=1
@@ -1169,7 +1174,8 @@ Before you start, please
export ASCEND_TRANSPORT_PRINT=1 export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1 export ACL_OP_INIT_MODE=1
export ASCEND_A3_ENABLE=1 export ASCEND_A3_ENABLE=1
export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000 # Timeout (in seconds) for automatically releasing the prefillers KV cache for a particular request.
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export TASK_QUEUE_ENABLE=1 export TASK_QUEUE_ENABLE=1

View File

@@ -288,7 +288,8 @@ To run the vllm-ascend `Prefill-Decode Disaggregation` service, you need to depl
# jemalloc is for better performance, if `libjemalloc.so` is installed on your machine, you can turn it on. # jemalloc is for better performance, if `libjemalloc.so` is installed on your machine, you can turn it on.
# export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libjemalloc.so.2:$LD_PRELOAD # export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libjemalloc.so.2:$LD_PRELOAD
export VLLM_ENGINE_READY_TIMEOUT_S=30000 export VLLM_ENGINE_READY_TIMEOUT_S=30000
export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=30000 # Timeout (in seconds) for automatically releasing the prefillers KV cache for a particular request.
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export IP_ADDRESS=$local_ip export IP_ADDRESS=$local_ip
export NETWORK_CARD_NAME=$nic_name export NETWORK_CARD_NAME=$nic_name
export HCCL_IF_IP=$IP_ADDRESS export HCCL_IF_IP=$IP_ADDRESS
@@ -362,7 +363,8 @@ To run the vllm-ascend `Prefill-Decode Disaggregation` service, you need to depl
node0_ip="xxxx" node0_ip="xxxx"
export VLLM_ENGINE_READY_TIMEOUT_S=30000 export VLLM_ENGINE_READY_TIMEOUT_S=30000
export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=30000 # Timeout (in seconds) for automatically releasing the prefillers KV cache for a particular request.
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export MASTER_IP_ADDRESS=$node0_ip export MASTER_IP_ADDRESS=$node0_ip
export IP_ADDRESS=$local_ip export IP_ADDRESS=$local_ip
@@ -442,7 +444,8 @@ To run the vllm-ascend `Prefill-Decode Disaggregation` service, you need to depl
node0_ip="xxxx" node0_ip="xxxx"
export VLLM_ENGINE_READY_TIMEOUT_S=30000 export VLLM_ENGINE_READY_TIMEOUT_S=30000
export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=30000 # Timeout (in seconds) for automatically releasing the prefillers KV cache for a particular request.
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
export MASTER_IP_ADDRESS=$node0_ip export MASTER_IP_ADDRESS=$node0_ip
export IP_ADDRESS=$local_ip export IP_ADDRESS=$local_ip

View File

@@ -13,7 +13,7 @@ env_common:
HCCL_DETERMINISTIC: True HCCL_DETERMINISTIC: True
TASK_QUEUE_ENABLE: 1 TASK_QUEUE_ENABLE: 1
HCCL_OP_RETRY_ENABLE: "L0:0, L1:0" HCCL_OP_RETRY_ENABLE: "L0:0, L1:0"
VLLM_NIXL_ABORT_REQUEST_TIMEOUT: 300000 VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT: 480
disaggregated_prefill: disaggregated_prefill:
enabled: true enabled: true

View File

@@ -15,7 +15,7 @@ env_common:
ASCEND_TRANSPORT_PRINT: 1 ASCEND_TRANSPORT_PRINT: 1
ACL_OP_INIT_MODE: 1 ACL_OP_INIT_MODE: 1
ASCEND_A3_ENABLE: 1 ASCEND_A3_ENABLE: 1
VLLM_NIXL_ABORT_REQUEST_TIMEOUT: 300000 VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT: 480
VLLM_ENGINE_READY_TIMEOUT_S: 1800 VLLM_ENGINE_READY_TIMEOUT_S: 1800
HCCL_CONNECT_TIMEOUT: 1200 HCCL_CONNECT_TIMEOUT: 1200
HCCL_INTRA_PCIE_ENABLE: 1 HCCL_INTRA_PCIE_ENABLE: 1

View File

@@ -15,7 +15,7 @@ env_common:
ASCEND_TRANSPORT_PRINT: 1 ASCEND_TRANSPORT_PRINT: 1
ACL_OP_INIT_MODE: 1 ACL_OP_INIT_MODE: 1
ASCEND_A3_ENABLE: 1 ASCEND_A3_ENABLE: 1
VLLM_NIXL_ABORT_REQUEST_TIMEOUT: 300000 VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT: 480
VLLM_ENGINE_READY_TIMEOUT_S: 1800 VLLM_ENGINE_READY_TIMEOUT_S: 1800
HCCL_CONNECT_TIMEOUT: 1200 HCCL_CONNECT_TIMEOUT: 1200
HCCL_INTRA_PCIE_ENABLE: 1 HCCL_INTRA_PCIE_ENABLE: 1

View File

@@ -173,7 +173,7 @@ class KVCacheTaskTracker:
while self.delayed_free_requests: while self.delayed_free_requests:
request_id = next(iter(self.delayed_free_requests)) request_id = next(iter(self.delayed_free_requests))
delay_start_time = self.delayed_free_requests[request_id] delay_start_time = self.delayed_free_requests[request_id]
if current_time - delay_start_time > envs.VLLM_NIXL_ABORT_REQUEST_TIMEOUT: if current_time - delay_start_time > envs.VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT:
self.delayed_free_requests.popitem(last=False) self.delayed_free_requests.popitem(last=False)
self.reqs_to_process.discard(request_id) self.reqs_to_process.discard(request_id)
expired_requests.add(request_id) expired_requests.add(request_id)