[Cherry-pick] [0.11.0] pd proxy support ipv6 and fix proxy (#4242)
### What this PR does / why we need it? pd proxy support ipv6, mooncake connector check whether the IPv6 address is used and notify the user. --------- Signed-off-by: liziyu <liziyu16@huawei.com>
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
import ipaddress
|
||||
import threading
|
||||
from typing import Optional
|
||||
|
||||
@@ -8,6 +9,15 @@ _global_te_lock = threading.Lock()
|
||||
|
||||
|
||||
def get_global_te(hostname: str, device_name: Optional[str]):
|
||||
try:
|
||||
ip = ipaddress.ip_address(hostname)
|
||||
if isinstance(ip, ipaddress.IPv6Address):
|
||||
raise RuntimeError(
|
||||
"The backend of mooncake's Ascend Direct Xfer Library currently does not support IPv6."
|
||||
)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
global _global_te
|
||||
if _global_te is None:
|
||||
with _global_te_lock:
|
||||
|
||||
@@ -162,11 +162,6 @@ env_variables: Dict[str, Callable[[], Any]] = {
|
||||
# Whether to enable msMonitor tool to monitor the performance of vllm-ascend.
|
||||
"MSMONITOR_USE_DAEMON":
|
||||
lambda: bool(int(os.getenv("MSMONITOR_USE_DAEMON", '0'))),
|
||||
# Timeout (in seconds) for delayed KVCache block release. In the prefill
|
||||
# node, if a request is marked for delayed KV block release and the blocks
|
||||
# are not freed within this timeout, they will be forcibly released.
|
||||
"VLLM_ASCEND_KVCACHE_DELAY_FREE_TIMEOUT":
|
||||
lambda: int(os.getenv("VLLM_ASCEND_KVCACHE_DELAY_FREE_TIMEOUT", 250)),
|
||||
"VLLM_ASCEND_ENABLE_MLAPO":
|
||||
lambda: bool(int(os.getenv("VLLM_ASCEND_ENABLE_MLAPO", '0'))),
|
||||
# Whether to enable transpose weight and cast format to FRACTAL_NZ.
|
||||
|
||||
Reference in New Issue
Block a user