[P/D]Make kv-transfer env variable take effect & Fix load-balance proxy (#3981)

### What this PR does / why we need it? Make kv-transfer env variable take effect and Fix load-balance proxy. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By CI. - vLLM version: v0.11.0 - vLLM main: 83f478bb19 --------- Signed-off-by: liziyu <liziyu16@huawei.com> Signed-off-by: nwpu-zxr <zhouxuerong2@huawei.com> Co-authored-by: liziyu <liziyu16@huawei.com>
2025-11-06 12:02:47 +08:00
parent 737cad2b6b
commit b206e831e9
7 changed files with 33 additions and 13 deletions
--- a/vllm_ascend/distributed/utils.py
+++ b/vllm_ascend/distributed/utils.py
@@ -1,3 +1,5 @@
+import os
+
 import torch
 import torch.distributed as dist

@@ -45,3 +47,15 @@ def align_memory(tensor: torch.Tensor, alignment: int) -> torch.Tensor:
    aligned_addr = (data_ptr + alignment - 1) // alignment * alignment
    offset = (aligned_addr - data_ptr) // tensor.element_size()
    return tensor[int(offset):]
+
+
+def get_transfer_timeout_value():
+    ascend_transfer_timeout = os.getenv("ASCEND_TRANSFER_TIMEOUT", "")
+    if len(ascend_transfer_timeout) > 0:
+        return int(ascend_transfer_timeout)
+    hccl_rdma_timeout = int(os.getenv('HCCL_RDMA_TIMEOUT',
+                                      '20'))  # type: ignore
+    hccl_rdma_retry_cnt = int(os.getenv('HCCL_RDMA_RETRY_CNT',
+                                        '7'))  # type: ignore
+    return int((4.096 * (2**hccl_rdma_timeout)) * hccl_rdma_retry_cnt // 1000 +
+               3000)