Fix of DeepSeek Error in KV Pool Mixed Deployment Scenario (#3087)

### What this PR does / why we need it?
A new kv_role "kv_both" is added to run mixed deployment scenarios. The
mixed deployment will involve a decode phase, where with_prefill should
be false.

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.10.2
- vLLM main:
c60e6137f0

Signed-off-by: fems14 <1804143737@qq.com>
This commit is contained in:
fems14
2025-09-22 20:36:41 +08:00
committed by GitHub
parent 37a0715eda
commit 1c9f0fe26f
3 changed files with 12 additions and 6 deletions

View File

@@ -64,6 +64,7 @@ export MOONCAKE_CONFIG_PATH="/xxxxxx/mooncake.json"
export VLLM_USE_V1=1
export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3
export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1
# The upper boundary environment variable for memory swap logging is set to mooncake, where 1 indicates enabled and 0 indicates disabled.
export ASCEND_AGGREGATE_ENABLE=1
# The upper-level environment variable is the switch for enabling the mooncake aggregation function, where 1 means on and 0 means off.
@@ -104,6 +105,7 @@ python3 -m vllm.entrypoints.openai.api_server \
{
"kv_connector": "MooncakeConnectorStoreV1",
"kv_role": "kv_producer",
"mooncake_rpc_port":"0"
}
]
}
@@ -124,6 +126,7 @@ export PYTHONPATH=$PYTHONPATH:/xxxxx/vllm
export MOONCAKE_CONFIG_PATH="/xxxxx/mooncake.json"
export VLLM_USE_V1=1
export ASCEND_RT_VISIBLE_DEVICES=4,5,6,7
export ACL_OP_INIT_MODE=1
export ASCEND_TRANSPORT_PRINT=1
# The upper boundary environment variable for memory swap logging is set to mooncake, where 1 indicates enabled and 0 indicates disabled.
export ASCEND_AGGREGATE_ENABLE=1
@@ -165,6 +168,7 @@ python3 -m vllm.entrypoints.openai.api_server \
{
"kv_connector": "MooncakeConnectorStoreV1",
"kv_role": "kv_consumer",
"mooncake_rpc_port":"1"
}
]
}
@@ -223,6 +227,7 @@ export PYTHONPATH=$PYTHONPATH:/xxxxx/vllm
export MOONCAKE_CONFIG_PATH="/xxxxxx/mooncake.json"
export VLLM_USE_V1=1
export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3
export ACL_OP_INIT_MODE=1
export ASCEND_TRANSPORT_PRINT=1
# The upper boundary environment variable for memory swap logging is set to mooncake, where 1 indicates enabled and 0 indicates disabled.
export ASCEND_AGGREGATE_ENABLE=1
@@ -242,9 +247,10 @@ python3 -m vllm.entrypoints.openai.api_server \
--kv-transfer-config \
'{
"kv_connector": "MooncakeConnectorStoreV1",
"kv_role": "kv_producer",
"kv_role": "kv_both",
"kv_connector_extra_config": {
"use_layerwise": false
"use_layerwise": false,
"mooncake_rpc_port":"0"
}
}' > mix.log 2>&1
```