[P/D]Make kv-transfer env variable take effect & Fix load-balance proxy (#3981)
### What this PR does / why we need it?
Make kv-transfer env variable take effect and Fix load-balance proxy.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
By CI.
- vLLM version: v0.11.0
- vLLM main:
83f478bb19
---------
Signed-off-by: liziyu <liziyu16@huawei.com>
Signed-off-by: nwpu-zxr <zhouxuerong2@huawei.com>
Co-authored-by: liziyu <liziyu16@huawei.com>
This commit is contained in:
@@ -978,9 +978,6 @@ class MockTensor:
|
||||
self.data_ptr = MagicMock(return_value=0x1000)
|
||||
|
||||
|
||||
mock_envs_ascend = MagicMock()
|
||||
mock_envs_ascend.MOONCAKE_CONNECTOR_PROTOCOL = "mock_protocol"
|
||||
|
||||
mock_logger = MagicMock()
|
||||
|
||||
|
||||
@@ -1017,14 +1014,15 @@ def mock_string_to_int64_hash(s):
|
||||
class TestMooncakeConnectorWorker(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.envs_ascend_mock = MockEnvsAscend()
|
||||
self.mock_transfer_engine = MagicMock()
|
||||
self.mock_transfer_engine.get_rpc_port.return_value = 9090
|
||||
self.mock_transfer_engine.initialize.return_value = 0
|
||||
self.mock_transfer_engine.register_memory.return_value = 0
|
||||
|
||||
self.patches = [
|
||||
patch('os.getenv', return_value="10,11"),
|
||||
patch(
|
||||
'vllm_ascend.distributed.mooncake_layerwise_connector.envs_ascend.PHYSICAL_DEVICES',
|
||||
'10,11'),
|
||||
patch('torch.Tensor.size', return_value=(10, 16, 8, 16)),
|
||||
patch('torch.Tensor.element_size', return_value=4),
|
||||
patch('torch.Tensor.data_ptr', return_value=0x1000),
|
||||
@@ -1053,8 +1051,6 @@ class TestMooncakeConnectorWorker(unittest.TestCase):
|
||||
MagicMock()),
|
||||
patch('vllm_ascend.distributed.mooncake_connector.threading.Event',
|
||||
MagicMock()),
|
||||
patch.dict('sys.modules',
|
||||
{'vllm_ascend.envs': self.envs_ascend_mock}),
|
||||
]
|
||||
|
||||
for p in self.patches:
|
||||
|
||||
@@ -792,15 +792,15 @@ class TestMooncakeLayerwiseConnector(unittest.TestCase):
|
||||
class TestMooncakeLayerwiseConnectorWorker(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.envs_ascend_mock = type("MockEnvsAscend", (),
|
||||
{"PHYSICAL_DEVICES": "10,11"})()
|
||||
self.mock_transfer_engine = MagicMock()
|
||||
self.mock_transfer_engine.get_rpc_port.return_value = 9090
|
||||
self.mock_transfer_engine.initialize.return_value = 0
|
||||
self.mock_transfer_engine.register_memory.return_value = 0
|
||||
|
||||
self.patches = [
|
||||
patch('os.getenv', return_value="10,11"),
|
||||
patch(
|
||||
'vllm_ascend.distributed.mooncake_layerwise_connector.envs_ascend.PHYSICAL_DEVICES',
|
||||
'10,11'),
|
||||
patch('torch.Tensor.size', return_value=(10, 16, 8, 16)),
|
||||
patch('torch.Tensor.element_size', return_value=4),
|
||||
patch('torch.Tensor.data_ptr', return_value=0x1000),
|
||||
@@ -833,8 +833,6 @@ class TestMooncakeLayerwiseConnectorWorker(unittest.TestCase):
|
||||
patch(
|
||||
'vllm_ascend.distributed.mooncake_layerwise_connector.threading.Event',
|
||||
MagicMock()),
|
||||
patch.dict('sys.modules',
|
||||
{'vllm_ascend.envs': self.envs_ascend_mock}),
|
||||
patch(
|
||||
'vllm_ascend.distributed.mooncake_layerwise_connector.get_ascend_config',
|
||||
return_value=SimpleNamespace(pd_tp_ratio=1,
|
||||
|
||||
Reference in New Issue
Block a user