diff --git a/vllm_ascend/distributed/llmdatadist_c_mgr_connector.py b/vllm_ascend/distributed/llmdatadist_c_mgr_connector.py index 691a0a3..fe6617a 100644 --- a/vllm_ascend/distributed/llmdatadist_c_mgr_connector.py +++ b/vllm_ascend/distributed/llmdatadist_c_mgr_connector.py @@ -181,7 +181,7 @@ class LLMDataDistCMgrConnectorScheduler(): dp_rank_local = self.vllm_config.parallel_config.data_parallel_rank_local tp_size = self.vllm_config.parallel_config.tensor_parallel_size - self.port = dp_rank_local * tp_size + envs_ascend.VLLM_LLMDD_RPC_PORT if dp_rank_local is not None else tp_size + envs_ascend.VLLM_LLMDD_RPC_PORT + self.port = dp_rank_local * tp_size + envs_ascend.VLLM_ASCEND_LLMDD_RPC_PORT if dp_rank_local is not None else tp_size + envs_ascend.VLLM_ASCEND_LLMDD_RPC_PORT self._reqs_need_recv: dict[str, tuple[Request, list[int]]] = {} @@ -344,8 +344,8 @@ class LLMDataDistCMgrConnectorWorker(): def listen_for_agent_metadata_req(self, event: threading.Event): assert self.local_agent_metadata is not None - port = envs_ascend.VLLM_LLMDD_RPC_PORT + self.local_dp_rank * self.tp_size + self.tp_rank if self.local_dp_rank is not None else envs_ascend.VLLM_LLMDD_RPC_PORT + self.tp_size + self.tp_rank - url = f"tcp://0.0.0.0:{port}" + port = envs_ascend.VLLM_ASCEND_LLMDD_RPC_PORT + self.local_dp_rank * self.tp_size + self.tp_rank if self.local_dp_rank is not None else envs_ascend.VLLM_ASCEND_LLMDD_RPC_PORT + self.tp_size + self.tp_rank + url = f"tcp://{envs_ascend.VLLM_ASCEND_LLMDD_RPC_IP}:{port}" msg_encoder = msgspec.msgpack.Encoder() msg_decoder = msgspec.msgpack.Decoder() msg_to_send = msg_encoder.encode(self.local_agent_metadata) diff --git a/vllm_ascend/envs.py b/vllm_ascend/envs.py index 7120695..16148bb 100644 --- a/vllm_ascend/envs.py +++ b/vllm_ascend/envs.py @@ -123,11 +123,11 @@ env_variables: Dict[str, Callable[[], Any]] = { # remote worker. "VLLM_ASCEND_LLMDD_RPC_IP": lambda: os.getenv("VLLM_ASCEND_LLMDD_RPC_IP", "0.0.0.0"), - # `LLMDataDistCMgrConnector` required variable. `VLLM_LLMDD_RPC_PORT` is used as the + # `LLMDataDistCMgrConnector` required variable. `VLLM_ASCEND_LLMDD_RPC_PORT` is used as the # rpc communication listening port, which will be used to receive the agent metadata from the # remote worker. - "VLLM_LLMDD_RPC_PORT": - lambda: int(os.getenv("VLLM_LLMDD_RPC_PORT", 5557)), + "VLLM_ASCEND_LLMDD_RPC_PORT": + lambda: int(os.getenv("VLLM_ASCEND_LLMDD_RPC_PORT", 5557)), # Whether to enable mla_pa for deepseek mla decode, this flag will be removed after its available torch_npu is public accessible # and the mla_pa will be the default path of deepseek decode path. "VLLM_ASCEND_MLA_PA":