[releases/v0.18.0][Doc][Misc] Modifying Configuration Parameters (#8618)
### What this PR does / why we need it? This PR renames the environment variable VLLM_NIXL_ABORT_REQUEST_TIMEOUT to VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT to align with the Mooncake connector naming convention. It also updates the documentation and test configurations to reflect this change and adjusts the suggested timeout value in the documentation to 480 seconds for consistency. ### Does this PR introduce _any_ user-facing change? Yes. The environment variable for configuring the abort request timeout has been renamed. Users should update their environment settings from VLLM_NIXL_ABORT_REQUEST_TIMEOUT to VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT. ### How was this patch tested? The changes were verified by updating the corresponding test configuration files and ensuring consistency across the documentation. --------- Signed-off-by: herizhen <1270637059@qq.com> Signed-off-by: herizhen <59841270+herizhen@users.noreply.github.com>
This commit is contained in:
@@ -526,7 +526,8 @@ Before you start, please
|
||||
export ASCEND_TRANSPORT_PRINT=1
|
||||
export ACL_OP_INIT_MODE=1
|
||||
export ASCEND_A3_ENABLE=1
|
||||
export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000
|
||||
# Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
|
||||
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
|
||||
|
||||
export ASCEND_RT_VISIBLE_DEVICES=$1
|
||||
|
||||
@@ -600,7 +601,8 @@ Before you start, please
|
||||
export ASCEND_TRANSPORT_PRINT=1
|
||||
export ACL_OP_INIT_MODE=1
|
||||
export ASCEND_A3_ENABLE=1
|
||||
export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000
|
||||
# Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
|
||||
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
|
||||
|
||||
export ASCEND_RT_VISIBLE_DEVICES=$1
|
||||
|
||||
@@ -676,7 +678,8 @@ Before you start, please
|
||||
export ASCEND_TRANSPORT_PRINT=1
|
||||
export ACL_OP_INIT_MODE=1
|
||||
export ASCEND_A3_ENABLE=1
|
||||
export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000
|
||||
# Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
|
||||
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
|
||||
|
||||
export TASK_QUEUE_ENABLE=1
|
||||
|
||||
@@ -752,7 +755,8 @@ Before you start, please
|
||||
export ASCEND_TRANSPORT_PRINT=1
|
||||
export ACL_OP_INIT_MODE=1
|
||||
export ASCEND_A3_ENABLE=1
|
||||
export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000
|
||||
# Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
|
||||
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
|
||||
|
||||
export TASK_QUEUE_ENABLE=1
|
||||
|
||||
|
||||
@@ -530,6 +530,8 @@ Before you start, please
|
||||
export ASCEND_TRANSPORT_PRINT=1
|
||||
export ACL_OP_INIT_MODE=1
|
||||
export ASCEND_A3_ENABLE=1
|
||||
# Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
|
||||
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
|
||||
export TASK_QUEUE_ENABLE=1
|
||||
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/mooncake:$LD_LIBRARY_PATH
|
||||
export VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE=1
|
||||
@@ -598,6 +600,8 @@ Before you start, please
|
||||
export ASCEND_TRANSPORT_PRINT=1
|
||||
export ACL_OP_INIT_MODE=1
|
||||
export ASCEND_A3_ENABLE=1
|
||||
# Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
|
||||
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
|
||||
export TASK_QUEUE_ENABLE=1
|
||||
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/mooncake:$LD_LIBRARY_PATH
|
||||
export VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE=1
|
||||
|
||||
@@ -766,7 +766,8 @@ Before you start, please
|
||||
export ASCEND_TRANSPORT_PRINT=1
|
||||
export ACL_OP_INIT_MODE=1
|
||||
export ASCEND_A3_ENABLE=1
|
||||
export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000
|
||||
# Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
|
||||
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
|
||||
|
||||
export ASCEND_RT_VISIBLE_DEVICES=$1
|
||||
export VLLM_ASCEND_ENABLE_FLASHCOMM1=1
|
||||
@@ -844,7 +845,8 @@ Before you start, please
|
||||
export ASCEND_TRANSPORT_PRINT=1
|
||||
export ACL_OP_INIT_MODE=1
|
||||
export ASCEND_A3_ENABLE=1
|
||||
export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000
|
||||
# Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
|
||||
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
|
||||
|
||||
export ASCEND_RT_VISIBLE_DEVICES=$1
|
||||
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
|
||||
@@ -926,7 +928,8 @@ Before you start, please
|
||||
export ASCEND_TRANSPORT_PRINT=1
|
||||
export ACL_OP_INIT_MODE=1
|
||||
export ASCEND_A3_ENABLE=1
|
||||
export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000
|
||||
# Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
|
||||
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
|
||||
|
||||
export TASK_QUEUE_ENABLE=1
|
||||
|
||||
@@ -1007,7 +1010,8 @@ Before you start, please
|
||||
export ASCEND_TRANSPORT_PRINT=1
|
||||
export ACL_OP_INIT_MODE=1
|
||||
export ASCEND_A3_ENABLE=1
|
||||
export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000
|
||||
# Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
|
||||
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
|
||||
|
||||
export TASK_QUEUE_ENABLE=1
|
||||
|
||||
@@ -1088,7 +1092,8 @@ Before you start, please
|
||||
export ASCEND_TRANSPORT_PRINT=1
|
||||
export ACL_OP_INIT_MODE=1
|
||||
export ASCEND_A3_ENABLE=1
|
||||
export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000
|
||||
# Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
|
||||
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
|
||||
|
||||
export TASK_QUEUE_ENABLE=1
|
||||
|
||||
@@ -1169,7 +1174,8 @@ Before you start, please
|
||||
export ASCEND_TRANSPORT_PRINT=1
|
||||
export ACL_OP_INIT_MODE=1
|
||||
export ASCEND_A3_ENABLE=1
|
||||
export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=300000
|
||||
# Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
|
||||
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
|
||||
|
||||
export TASK_QUEUE_ENABLE=1
|
||||
|
||||
|
||||
@@ -288,7 +288,8 @@ To run the vllm-ascend `Prefill-Decode Disaggregation` service, you need to depl
|
||||
# jemalloc is for better performance, if `libjemalloc.so` is installed on your machine, you can turn it on.
|
||||
# export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libjemalloc.so.2:$LD_PRELOAD
|
||||
export VLLM_ENGINE_READY_TIMEOUT_S=30000
|
||||
export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=30000
|
||||
# Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
|
||||
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
|
||||
export IP_ADDRESS=$local_ip
|
||||
export NETWORK_CARD_NAME=$nic_name
|
||||
export HCCL_IF_IP=$IP_ADDRESS
|
||||
@@ -362,7 +363,8 @@ To run the vllm-ascend `Prefill-Decode Disaggregation` service, you need to depl
|
||||
node0_ip="xxxx"
|
||||
|
||||
export VLLM_ENGINE_READY_TIMEOUT_S=30000
|
||||
export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=30000
|
||||
# Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
|
||||
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
|
||||
export MASTER_IP_ADDRESS=$node0_ip
|
||||
export IP_ADDRESS=$local_ip
|
||||
|
||||
@@ -442,7 +444,8 @@ To run the vllm-ascend `Prefill-Decode Disaggregation` service, you need to depl
|
||||
node0_ip="xxxx"
|
||||
|
||||
export VLLM_ENGINE_READY_TIMEOUT_S=30000
|
||||
export VLLM_NIXL_ABORT_REQUEST_TIMEOUT=30000
|
||||
# Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request.
|
||||
export VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT=480
|
||||
export MASTER_IP_ADDRESS=$node0_ip
|
||||
export IP_ADDRESS=$local_ip
|
||||
|
||||
|
||||
Reference in New Issue
Block a user