[P/D]Make kv-transfer env variable take effect & Fix load-balance proxy (#3981)

### What this PR does / why we need it?
Make kv-transfer env variable take effect and Fix load-balance proxy.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
By CI.


- vLLM version: v0.11.0
- vLLM main:
83f478bb19

---------

Signed-off-by: liziyu <liziyu16@huawei.com>
Signed-off-by: nwpu-zxr <zhouxuerong2@huawei.com>
Co-authored-by: liziyu <liziyu16@huawei.com>
This commit is contained in:
zxr2333
2025-11-06 12:02:47 +08:00
committed by GitHub
parent 737cad2b6b
commit b206e831e9
7 changed files with 33 additions and 13 deletions

View File

@@ -978,9 +978,6 @@ class MockTensor:
self.data_ptr = MagicMock(return_value=0x1000)
mock_envs_ascend = MagicMock()
mock_envs_ascend.MOONCAKE_CONNECTOR_PROTOCOL = "mock_protocol"
mock_logger = MagicMock()
@@ -1017,14 +1014,15 @@ def mock_string_to_int64_hash(s):
class TestMooncakeConnectorWorker(unittest.TestCase):
def setUp(self):
self.envs_ascend_mock = MockEnvsAscend()
self.mock_transfer_engine = MagicMock()
self.mock_transfer_engine.get_rpc_port.return_value = 9090
self.mock_transfer_engine.initialize.return_value = 0
self.mock_transfer_engine.register_memory.return_value = 0
self.patches = [
patch('os.getenv', return_value="10,11"),
patch(
'vllm_ascend.distributed.mooncake_layerwise_connector.envs_ascend.PHYSICAL_DEVICES',
'10,11'),
patch('torch.Tensor.size', return_value=(10, 16, 8, 16)),
patch('torch.Tensor.element_size', return_value=4),
patch('torch.Tensor.data_ptr', return_value=0x1000),
@@ -1053,8 +1051,6 @@ class TestMooncakeConnectorWorker(unittest.TestCase):
MagicMock()),
patch('vllm_ascend.distributed.mooncake_connector.threading.Event',
MagicMock()),
patch.dict('sys.modules',
{'vllm_ascend.envs': self.envs_ascend_mock}),
]
for p in self.patches:

View File

@@ -792,15 +792,15 @@ class TestMooncakeLayerwiseConnector(unittest.TestCase):
class TestMooncakeLayerwiseConnectorWorker(unittest.TestCase):
def setUp(self):
self.envs_ascend_mock = type("MockEnvsAscend", (),
{"PHYSICAL_DEVICES": "10,11"})()
self.mock_transfer_engine = MagicMock()
self.mock_transfer_engine.get_rpc_port.return_value = 9090
self.mock_transfer_engine.initialize.return_value = 0
self.mock_transfer_engine.register_memory.return_value = 0
self.patches = [
patch('os.getenv', return_value="10,11"),
patch(
'vllm_ascend.distributed.mooncake_layerwise_connector.envs_ascend.PHYSICAL_DEVICES',
'10,11'),
patch('torch.Tensor.size', return_value=(10, 16, 8, 16)),
patch('torch.Tensor.element_size', return_value=4),
patch('torch.Tensor.data_ptr', return_value=0x1000),
@@ -833,8 +833,6 @@ class TestMooncakeLayerwiseConnectorWorker(unittest.TestCase):
patch(
'vllm_ascend.distributed.mooncake_layerwise_connector.threading.Event',
MagicMock()),
patch.dict('sys.modules',
{'vllm_ascend.envs': self.envs_ascend_mock}),
patch(
'vllm_ascend.distributed.mooncake_layerwise_connector.get_ascend_config',
return_value=SimpleNamespace(pd_tp_ratio=1,