[Refactor]Refactor of vllm_ascend/distributed module (#5719)

### What this PR does / why we need it?
Based on the RFC:https://github.com/vllm-project/vllm-ascend/issues/5604

This PR is a refactoring of vllm_ascend/distributed, moving all
kv_transfer realtaed codes into a dedicated folder, which has already
been done in vLLM

### Does this PR introduce _any_ user-facing change?
NA

### How was this patch tested?


- vLLM version: v0.13.0
- vLLM main:
2f4e6548ef

---------

Signed-off-by: lty <linhebiwen@gmail.com>
This commit is contained in:
lty
2026-01-15 08:57:40 +08:00
committed by GitHub
parent f34b3b8ee9
commit 295018ec0f
56 changed files with 300 additions and 293 deletions

View File

@@ -326,7 +326,6 @@ vllm serve /weights/DeepSeek-V3.1-w8a8-mtp-QuaRot \
"kv_role": "kv_producer",
"kv_port": "30000",
"engine_id": "0",
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
"kv_connector_extra_config": {
"prefill": {
"dp_size": 2,
@@ -402,7 +401,6 @@ vllm serve /weights/DeepSeek-V3.1-w8a8-mtp-QuaRot \
"kv_role": "kv_producer",
"kv_port": "30100",
"engine_id": "1",
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
"kv_connector_extra_config": {
"prefill": {
"dp_size": 2,
@@ -480,7 +478,6 @@ vllm serve /weights/DeepSeek-V3.1-w8a8-mtp-QuaRot \
"kv_role": "kv_consumer",
"kv_port": "30200",
"engine_id": "2",
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
"kv_connector_extra_config": {
"prefill": {
"dp_size": 2,
@@ -558,7 +555,6 @@ vllm serve /weights/DeepSeek-V3.1-w8a8-mtp-QuaRot \
"kv_role": "kv_consumer",
"kv_port": "30300",
"engine_id": "3",
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
"kv_connector_extra_config": {
"prefill": {
"dp_size": 2,

View File

@@ -316,7 +316,6 @@ Before you start, please
"kv_role": "kv_producer",
"kv_port": "30000",
"engine_id": "0",
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
"kv_connector_extra_config": {
"use_ascend_direct": true,
"prefill": {
@@ -391,7 +390,6 @@ Before you start, please
"kv_role": "kv_producer",
"kv_port": "30000",
"engine_id": "0",
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
"kv_connector_extra_config": {
"use_ascend_direct": true,
"prefill": {
@@ -469,7 +467,6 @@ Before you start, please
"kv_role": "kv_consumer",
"kv_port": "30100",
"engine_id": "1",
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
"kv_connector_extra_config": {
"use_ascend_direct": true,
"prefill": {
@@ -548,7 +545,6 @@ Before you start, please
"kv_role": "kv_consumer",
"kv_port": "30100",
"engine_id": "1",
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
"kv_connector_extra_config": {
"use_ascend_direct": true,
"prefill": {

View File

@@ -450,7 +450,6 @@ vllm serve vllm-ascend/Qwen3-235B-A22B-w8a8 \
"kv_role": "kv_producer",
"kv_port": "30000",
"engine_id": "0",
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
"kv_connector_extra_config": {
"use_ascend_direct": true,
"prefill": {
@@ -516,7 +515,6 @@ vllm serve vllm-ascend/Qwen3-235B-A22B-w8a8 \
"kv_role": "kv_consumer",
"kv_port": "30100",
"engine_id": "1",
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
"kv_connector_extra_config": {
"use_ascend_direct": true,
"prefill": {
@@ -583,7 +581,6 @@ vllm serve vllm-ascend/Qwen3-235B-A22B-w8a8 \
"kv_role": "kv_consumer",
"kv_port": "30100",
"engine_id": "1",
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
"kv_connector_extra_config": {
"use_ascend_direct": true,
"prefill": {

View File

@@ -123,7 +123,6 @@ vllm serve /path_to_weight/DeepSeek-V3.1_w8a8mix_mtp \
"kv_role": "kv_producer",
"kv_port": "30000",
"engine_id": "0",
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
"kv_connector_extra_config": {
"use_ascend_direct": true,
"prefill": {
@@ -192,7 +191,6 @@ vllm serve /path_to_weight/DeepSeek-V3.1_w8a8mix_mtp \
"kv_role": "kv_producer",
"kv_port": "30000",
"engine_id": "1",
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
"kv_connector_extra_config": {
"use_ascend_direct": true,
"prefill": {
@@ -259,7 +257,6 @@ vllm serve /path_to_weight/DeepSeek-V3.1_w8a8mix_mtp \
"kv_role": "kv_consumer",
"kv_port": "30200",
"engine_id": "3",
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
"kv_connector_extra_config": {
"prefill": {
"dp_size": 1,

View File

@@ -280,7 +280,6 @@ vllm serve /path_to_weight/DeepSeek-r1_w8a8_mtp \
"kv_role": "kv_producer",
"kv_port": "30000",
"engine_id": "0",
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_layerwise_connector",
"kv_connector_extra_config": {
"prefill": {
"dp_size": 2,
@@ -340,7 +339,6 @@ vllm serve /path_to_weight/DeepSeek-r1_w8a8_mtp \
"kv_role": "kv_producer",
"kv_port": "30100",
"engine_id": "1",
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_layerwise_connector",
"kv_connector_extra_config": {
"prefill": {
"dp_size": 2,
@@ -401,7 +399,6 @@ vllm serve /path_to_weight/DeepSeek-r1_w8a8_mtp \
"kv_role": "kv_consumer",
"kv_port": "30200",
"engine_id": "2",
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_layerwise_connector",
"kv_connector_extra_config": {
"prefill": {
"dp_size": 2,
@@ -461,7 +458,6 @@ vllm serve /path_to_weight/DeepSeek-r1_w8a8_mtp \
"kv_role": "kv_consumer",
"kv_port": "30200",
"engine_id": "2",
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_layerwise_connector",
"kv_connector_extra_config": {
"prefill": {
@@ -529,7 +525,6 @@ vllm serve /path_to_weight/DeepSeek-r1_w8a8_mtp \
"kv_role": "kv_producer",
"kv_port": "30000",
"engine_id": "0",
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
"kv_connector_extra_config": {
"prefill": {
"dp_size": 2,
@@ -589,7 +584,6 @@ vllm serve /path_to_weight/DeepSeek-r1_w8a8_mtp \
"kv_role": "kv_producer",
"kv_port": "30100",
"engine_id": "1",
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
"kv_connector_extra_config": {
"prefill": {
"dp_size": 2,
@@ -650,7 +644,6 @@ vllm serve /path_to_weight/DeepSeek-r1_w8a8_mtp \
"kv_role": "kv_consumer",
"kv_port": "30200",
"engine_id": "2",
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
"kv_connector_extra_config": {
"prefill": {
"dp_size": 2,
@@ -710,7 +703,6 @@ vllm serve /path_to_weight/DeepSeek-r1_w8a8_mtp \
"kv_role": "kv_consumer",
"kv_port": "30200",
"engine_id": "2",
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
"kv_connector_extra_config": {
"prefill": {
"dp_size": 2,

View File

@@ -173,7 +173,6 @@ vllm serve /model/Qwen2.5-VL-7B-Instruct \
"kv_role": "kv_producer",
"kv_port": "30000",
"engine_id": "0",
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
"kv_connector_extra_config": {
"prefill": {
"dp_size": 1,
@@ -216,7 +215,6 @@ vllm serve /model/Qwen2.5-VL-7B-Instruct \
"kv_role": "kv_consumer",
"kv_port": "30100",
"engine_id": "1",
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
"kv_connector_extra_config": {
"prefill": {
"dp_size": 1,