[Refactor]refactor p2p connector (#6551)
### What this PR does / why we need it?
Redundant code is removed, and repeated logic is combined through the
p2p connector refactor, making the code easy to extend.
### Does this PR introduce _any_ user-facing change?
NA
### How was this patch tested?
P节点:
```
vllm serve /mnt/weight/DeepSeek-V3.2-Exp-W8A8 \
--host 0.0.0.0 \
--port 8002 \
--data-parallel-size 2 \
--tensor-parallel-size 8 \
--enable-expert-parallel \
--seed 1024 \
--served-model-name model \
--max-model-len 8192 \
--max-num-batched-tokens 8192 \
--max-num-seqs 16 \
--enforce-eager \
--trust-remote-code \
--gpu-memory-utilization 0.92 \
--quantization ascend \
--async-scheduling \
--additional-config '{"ascend_scheduler_config":{"enabled":true}}' \
--kv-transfer-config \
'{
"kv_connector": "MultiConnector",
"kv_role": "kv_producer",
"kv_connector_extra_config": {
"use_layerwise": false,
"connectors": [
{
"kv_connector": "MooncakeConnectorV1",
"kv_role": "kv_producer",
"kv_port": "30000",
"kv_connector_extra_config": {
"use_ascend_direct": true,
"prefill": {
"dp_size": 2,
"tp_size": 8
},
"decode": {
"dp_size": 4,
"tp_size": 4
}
}
},
{
"kv_connector": "AscendStoreConnector",
"kv_role": "kv_producer",
"kv_connector_extra_config": {
"backend": "mooncake",
"mooncake_rpc_port":"0"
}
}
]
}
}'
```
D节点:
```
vllm serve /mnt/share/DeepSeek-V3.2-Exp-W8A8 \
--host 0.0.0.0 \
--port 8003 \
--data-parallel-size 4 \
--tensor-parallel-size 4 \
--enable-expert-parallel \
--seed 1024 \
--served-model-name model \
--max-model-len 8192 \
--max-num-batched-tokens 8192 \
--max-num-seqs 16 \
--enforce-eager \
--trust-remote-code \
--gpu-memory-utilization 0.92 \
--quantization ascend \
--async-scheduling \
--additional-config '{"ascend_scheduler_config":{"enabled":true}}' \
--kv-transfer-config \
'{
"kv_connector": "MultiConnector",
"kv_role": "kv_consumer",
"kv_connector_extra_config": {
"use_layerwise": false,
"connectors": [
{
"kv_connector": "MooncakeConnectorV1",
"kv_role": "kv_consumer",
"kv_port": "30100",
"kv_connector_extra_config": {
"use_ascend_direct": true,
"prefill": {
"dp_size": 2,
"tp_size": 8
},
"decode": {
"dp_size": 4,
"tp_size": 4
}
}
},{
"kv_connector": "AscendStoreConnector",
"kv_role": "kv_consumer",
"kv_connector_extra_config": {
"backend": "mooncake",
"mooncake_rpc_port":"1"
}
}
]
}
}'
```
- vLLM version: v0.15.0
- vLLM main: https://github.com/vllm-project/vllm/commit/v0.15.0
---------
Signed-off-by: lty <linhebiwen@gmail.com>