[Refactor]Refactor of vllm_ascend/distributed module (#5719)
### What this PR does / why we need it?
Based on the RFC:https://github.com/vllm-project/vllm-ascend/issues/5604
This PR is a refactoring of vllm_ascend/distributed, moving all
kv_transfer realtaed codes into a dedicated folder, which has already
been done in vLLM
### Does this PR introduce _any_ user-facing change?
NA
### How was this patch tested?
- vLLM version: v0.13.0
- vLLM main:
2f4e6548ef
---------
Signed-off-by: lty <linhebiwen@gmail.com>
This commit is contained in:
@@ -55,7 +55,6 @@ vllm serve "/xxxxx/DeepSeek-V2-Lite-Chat" \
|
||||
"kv_port": "20001",
|
||||
"engine_id": "0",
|
||||
"kv_rank": 0,
|
||||
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
|
||||
"kv_connector_extra_config": {
|
||||
"prefill": {
|
||||
"dp_size": 2,
|
||||
@@ -123,7 +122,6 @@ vllm serve "/xxxxx/DeepSeek-V2-Lite-Chat" \
|
||||
"kv_port": "20002",
|
||||
"engine_id": "1",
|
||||
"kv_rank": 1,
|
||||
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
|
||||
"kv_connector_extra_config": {
|
||||
"prefill": {
|
||||
"dp_size": 2,
|
||||
|
||||
@@ -56,7 +56,6 @@ def run_prefill(prefill_done, process_close):
|
||||
kv_role="kv_producer",
|
||||
kv_port="30000",
|
||||
engine_id="0",
|
||||
kv_connector_module_path="vllm_ascend.distributed.mooncake_connector",
|
||||
kv_connector_extra_config={"prefill": {"dp_size": 1, "tp_size": 1}, "decode": {"dp_size": 1, "tp_size": 1}},
|
||||
)
|
||||
# Set NPU memory utilization to 0.8
|
||||
@@ -104,7 +103,6 @@ def run_decode(prefill_done):
|
||||
kv_role="kv_consumer",
|
||||
kv_port="30100",
|
||||
engine_id="1",
|
||||
kv_connector_module_path="vllm_ascend.distributed.mooncake_connector",
|
||||
kv_connector_extra_config={"prefill": {"dp_size": 1, "tp_size": 1}, "decode": {"dp_size": 1, "tp_size": 1}},
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user