Fix of DeepSeek Error in KV Pool Mixed Deployment Scenario (#3087)

### What this PR does / why we need it? A new kv_role "kv_both" is added to run mixed deployment scenarios. The mixed deployment will involve a decode phase, where with_prefill should be false. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.10.2 - vLLM main: c60e6137f0 Signed-off-by: fems14 <1804143737@qq.com>
2025-09-22 20:36:41 +08:00
parent 37a0715eda
commit 1c9f0fe26f
3 changed files with 12 additions and 6 deletions
--- a/examples/disaggregated_prefill_v1/mooncake_connector_store_deployment_guide.md
+++ b/examples/disaggregated_prefill_v1/mooncake_connector_store_deployment_guide.md
@@ -64,6 +64,7 @@ export MOONCAKE_CONFIG_PATH="/xxxxxx/mooncake.json"
 export VLLM_USE_V1=1
 export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3
 export ASCEND_TRANSPORT_PRINT=1
+export ACL_OP_INIT_MODE=1
 # The upper boundary environment variable for memory swap logging is set to mooncake, where 1 indicates enabled and 0 indicates disabled.
 export ASCEND_AGGREGATE_ENABLE=1
 # The upper-level environment variable is the switch for enabling the mooncake aggregation function, where 1 means on and 0 means off.
@@ -104,6 +105,7 @@ python3 -m vllm.entrypoints.openai.api_server \
            		{
 				"kv_connector": "MooncakeConnectorStoreV1",
 				"kv_role": "kv_producer",
+                "mooncake_rpc_port":"0"
 			}  
 		]
 	}
@@ -124,6 +126,7 @@ export PYTHONPATH=$PYTHONPATH:/xxxxx/vllm
 export MOONCAKE_CONFIG_PATH="/xxxxx/mooncake.json"
 export VLLM_USE_V1=1
 export ASCEND_RT_VISIBLE_DEVICES=4,5,6,7
+export ACL_OP_INIT_MODE=1
 export ASCEND_TRANSPORT_PRINT=1
 # The upper boundary environment variable for memory swap logging is set to mooncake, where 1 indicates enabled and 0 indicates disabled.
 export ASCEND_AGGREGATE_ENABLE=1
@@ -165,6 +168,7 @@ python3 -m vllm.entrypoints.openai.api_server \
 			{
 				"kv_connector": "MooncakeConnectorStoreV1",
 				"kv_role": "kv_consumer",
+                "mooncake_rpc_port":"1"
 			}
 		]
 	}
@@ -223,6 +227,7 @@ export PYTHONPATH=$PYTHONPATH:/xxxxx/vllm
 export MOONCAKE_CONFIG_PATH="/xxxxxx/mooncake.json"
 export VLLM_USE_V1=1
 export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3
+export ACL_OP_INIT_MODE=1
 export ASCEND_TRANSPORT_PRINT=1
 # The upper boundary environment variable for memory swap logging is set to mooncake, where 1 indicates enabled and 0 indicates disabled.
 export ASCEND_AGGREGATE_ENABLE=1
@@ -242,9 +247,10 @@ python3 -m vllm.entrypoints.openai.api_server \
    --kv-transfer-config \
    '{
 	"kv_connector": "MooncakeConnectorStoreV1",
-	"kv_role": "kv_producer",
+	"kv_role": "kv_both",
 	"kv_connector_extra_config": {
-		"use_layerwise": false
+		"use_layerwise": false,
+        "mooncake_rpc_port":"0"
 	}
 }' > mix.log 2>&1
 ```