[Refactor]Refactor of vllm_ascend/distributed module (#5719)
### What this PR does / why we need it?
Based on the RFC:https://github.com/vllm-project/vllm-ascend/issues/5604
This PR is a refactoring of vllm_ascend/distributed, moving all
kv_transfer realtaed codes into a dedicated folder, which has already
been done in vLLM
### Does this PR introduce _any_ user-facing change?
NA
### How was this patch tested?
- vLLM version: v0.13.0
- vLLM main:
2f4e6548ef
---------
Signed-off-by: lty <linhebiwen@gmail.com>
This commit is contained in:
@@ -326,7 +326,6 @@ vllm serve /weights/DeepSeek-V3.1-w8a8-mtp-QuaRot \
|
||||
"kv_role": "kv_producer",
|
||||
"kv_port": "30000",
|
||||
"engine_id": "0",
|
||||
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
|
||||
"kv_connector_extra_config": {
|
||||
"prefill": {
|
||||
"dp_size": 2,
|
||||
@@ -402,7 +401,6 @@ vllm serve /weights/DeepSeek-V3.1-w8a8-mtp-QuaRot \
|
||||
"kv_role": "kv_producer",
|
||||
"kv_port": "30100",
|
||||
"engine_id": "1",
|
||||
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
|
||||
"kv_connector_extra_config": {
|
||||
"prefill": {
|
||||
"dp_size": 2,
|
||||
@@ -480,7 +478,6 @@ vllm serve /weights/DeepSeek-V3.1-w8a8-mtp-QuaRot \
|
||||
"kv_role": "kv_consumer",
|
||||
"kv_port": "30200",
|
||||
"engine_id": "2",
|
||||
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
|
||||
"kv_connector_extra_config": {
|
||||
"prefill": {
|
||||
"dp_size": 2,
|
||||
@@ -558,7 +555,6 @@ vllm serve /weights/DeepSeek-V3.1-w8a8-mtp-QuaRot \
|
||||
"kv_role": "kv_consumer",
|
||||
"kv_port": "30300",
|
||||
"engine_id": "3",
|
||||
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
|
||||
"kv_connector_extra_config": {
|
||||
"prefill": {
|
||||
"dp_size": 2,
|
||||
|
||||
@@ -316,7 +316,6 @@ Before you start, please
|
||||
"kv_role": "kv_producer",
|
||||
"kv_port": "30000",
|
||||
"engine_id": "0",
|
||||
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
|
||||
"kv_connector_extra_config": {
|
||||
"use_ascend_direct": true,
|
||||
"prefill": {
|
||||
@@ -391,7 +390,6 @@ Before you start, please
|
||||
"kv_role": "kv_producer",
|
||||
"kv_port": "30000",
|
||||
"engine_id": "0",
|
||||
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
|
||||
"kv_connector_extra_config": {
|
||||
"use_ascend_direct": true,
|
||||
"prefill": {
|
||||
@@ -469,7 +467,6 @@ Before you start, please
|
||||
"kv_role": "kv_consumer",
|
||||
"kv_port": "30100",
|
||||
"engine_id": "1",
|
||||
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
|
||||
"kv_connector_extra_config": {
|
||||
"use_ascend_direct": true,
|
||||
"prefill": {
|
||||
@@ -548,7 +545,6 @@ Before you start, please
|
||||
"kv_role": "kv_consumer",
|
||||
"kv_port": "30100",
|
||||
"engine_id": "1",
|
||||
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
|
||||
"kv_connector_extra_config": {
|
||||
"use_ascend_direct": true,
|
||||
"prefill": {
|
||||
|
||||
@@ -450,7 +450,6 @@ vllm serve vllm-ascend/Qwen3-235B-A22B-w8a8 \
|
||||
"kv_role": "kv_producer",
|
||||
"kv_port": "30000",
|
||||
"engine_id": "0",
|
||||
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
|
||||
"kv_connector_extra_config": {
|
||||
"use_ascend_direct": true,
|
||||
"prefill": {
|
||||
@@ -516,7 +515,6 @@ vllm serve vllm-ascend/Qwen3-235B-A22B-w8a8 \
|
||||
"kv_role": "kv_consumer",
|
||||
"kv_port": "30100",
|
||||
"engine_id": "1",
|
||||
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
|
||||
"kv_connector_extra_config": {
|
||||
"use_ascend_direct": true,
|
||||
"prefill": {
|
||||
@@ -583,7 +581,6 @@ vllm serve vllm-ascend/Qwen3-235B-A22B-w8a8 \
|
||||
"kv_role": "kv_consumer",
|
||||
"kv_port": "30100",
|
||||
"engine_id": "1",
|
||||
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
|
||||
"kv_connector_extra_config": {
|
||||
"use_ascend_direct": true,
|
||||
"prefill": {
|
||||
|
||||
@@ -123,7 +123,6 @@ vllm serve /path_to_weight/DeepSeek-V3.1_w8a8mix_mtp \
|
||||
"kv_role": "kv_producer",
|
||||
"kv_port": "30000",
|
||||
"engine_id": "0",
|
||||
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
|
||||
"kv_connector_extra_config": {
|
||||
"use_ascend_direct": true,
|
||||
"prefill": {
|
||||
@@ -192,7 +191,6 @@ vllm serve /path_to_weight/DeepSeek-V3.1_w8a8mix_mtp \
|
||||
"kv_role": "kv_producer",
|
||||
"kv_port": "30000",
|
||||
"engine_id": "1",
|
||||
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
|
||||
"kv_connector_extra_config": {
|
||||
"use_ascend_direct": true,
|
||||
"prefill": {
|
||||
@@ -259,7 +257,6 @@ vllm serve /path_to_weight/DeepSeek-V3.1_w8a8mix_mtp \
|
||||
"kv_role": "kv_consumer",
|
||||
"kv_port": "30200",
|
||||
"engine_id": "3",
|
||||
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
|
||||
"kv_connector_extra_config": {
|
||||
"prefill": {
|
||||
"dp_size": 1,
|
||||
|
||||
@@ -280,7 +280,6 @@ vllm serve /path_to_weight/DeepSeek-r1_w8a8_mtp \
|
||||
"kv_role": "kv_producer",
|
||||
"kv_port": "30000",
|
||||
"engine_id": "0",
|
||||
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_layerwise_connector",
|
||||
"kv_connector_extra_config": {
|
||||
"prefill": {
|
||||
"dp_size": 2,
|
||||
@@ -340,7 +339,6 @@ vllm serve /path_to_weight/DeepSeek-r1_w8a8_mtp \
|
||||
"kv_role": "kv_producer",
|
||||
"kv_port": "30100",
|
||||
"engine_id": "1",
|
||||
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_layerwise_connector",
|
||||
"kv_connector_extra_config": {
|
||||
"prefill": {
|
||||
"dp_size": 2,
|
||||
@@ -401,7 +399,6 @@ vllm serve /path_to_weight/DeepSeek-r1_w8a8_mtp \
|
||||
"kv_role": "kv_consumer",
|
||||
"kv_port": "30200",
|
||||
"engine_id": "2",
|
||||
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_layerwise_connector",
|
||||
"kv_connector_extra_config": {
|
||||
"prefill": {
|
||||
"dp_size": 2,
|
||||
@@ -461,7 +458,6 @@ vllm serve /path_to_weight/DeepSeek-r1_w8a8_mtp \
|
||||
"kv_role": "kv_consumer",
|
||||
"kv_port": "30200",
|
||||
"engine_id": "2",
|
||||
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_layerwise_connector",
|
||||
"kv_connector_extra_config": {
|
||||
|
||||
"prefill": {
|
||||
@@ -529,7 +525,6 @@ vllm serve /path_to_weight/DeepSeek-r1_w8a8_mtp \
|
||||
"kv_role": "kv_producer",
|
||||
"kv_port": "30000",
|
||||
"engine_id": "0",
|
||||
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
|
||||
"kv_connector_extra_config": {
|
||||
"prefill": {
|
||||
"dp_size": 2,
|
||||
@@ -589,7 +584,6 @@ vllm serve /path_to_weight/DeepSeek-r1_w8a8_mtp \
|
||||
"kv_role": "kv_producer",
|
||||
"kv_port": "30100",
|
||||
"engine_id": "1",
|
||||
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
|
||||
"kv_connector_extra_config": {
|
||||
"prefill": {
|
||||
"dp_size": 2,
|
||||
@@ -650,7 +644,6 @@ vllm serve /path_to_weight/DeepSeek-r1_w8a8_mtp \
|
||||
"kv_role": "kv_consumer",
|
||||
"kv_port": "30200",
|
||||
"engine_id": "2",
|
||||
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
|
||||
"kv_connector_extra_config": {
|
||||
"prefill": {
|
||||
"dp_size": 2,
|
||||
@@ -710,7 +703,6 @@ vllm serve /path_to_weight/DeepSeek-r1_w8a8_mtp \
|
||||
"kv_role": "kv_consumer",
|
||||
"kv_port": "30200",
|
||||
"engine_id": "2",
|
||||
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
|
||||
"kv_connector_extra_config": {
|
||||
"prefill": {
|
||||
"dp_size": 2,
|
||||
|
||||
@@ -173,7 +173,6 @@ vllm serve /model/Qwen2.5-VL-7B-Instruct \
|
||||
"kv_role": "kv_producer",
|
||||
"kv_port": "30000",
|
||||
"engine_id": "0",
|
||||
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
|
||||
"kv_connector_extra_config": {
|
||||
"prefill": {
|
||||
"dp_size": 1,
|
||||
@@ -216,7 +215,6 @@ vllm serve /model/Qwen2.5-VL-7B-Instruct \
|
||||
"kv_role": "kv_consumer",
|
||||
"kv_port": "30100",
|
||||
"engine_id": "1",
|
||||
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
|
||||
"kv_connector_extra_config": {
|
||||
"prefill": {
|
||||
"dp_size": 1,
|
||||
|
||||
Reference in New Issue
Block a user