[Cherry Pick from pr#3981][0.11.0][P/D]Make kv-transfer env variable take effect & Fix load-balance proxy (#3983)
### What this PR does / why we need it? Make kv-transfer env variable take effect & Fix load-balance proxy. Cherry Pick from #3981 --------- Signed-off-by: nwpu-zxr <zhouxuerong2@huawei.com>
This commit is contained in:
@@ -30,6 +30,7 @@ from vllm.v1.core.sched.output import SchedulerOutput
|
||||
from vllm.v1.request import Request, RequestStatus
|
||||
|
||||
import vllm_ascend.envs as envs_ascend
|
||||
from vllm_ascend.distributed.utils import get_transfer_timeout_value
|
||||
from vllm_ascend.utils import AscendSocVersion, get_ascend_soc_version
|
||||
|
||||
TORCH_DTYPE_TO_NPU_DTYPE = {
|
||||
@@ -411,7 +412,7 @@ class LLMDataDistCMgrConnectorWorker():
|
||||
assert self.local_agent_metadata is not None
|
||||
llm_config = LLMConfig()
|
||||
llm_config.device_id = self.local_rank
|
||||
llm_config.sync_kv_timeout = 20000
|
||||
llm_config.sync_kv_timeout = get_transfer_timeout_value()
|
||||
llm_config.enable_switch_role = True
|
||||
llm_config.enable_cache_manager = True
|
||||
llm_config.enable_remote_cache_accessible = True
|
||||
|
||||
Reference in New Issue
Block a user