From 1de16ead8eecfec8903ec1b330b27a4fa2593c35 Mon Sep 17 00:00:00 2001
From: Wang Kunpeng <1289706727@qq.com>
Date: Wed, 20 Aug 2025 20:25:53 +0800
Subject: [PATCH] [main][bugfix] Modify the default value of the
 enable_shared_pert_dp to false (#2457)

### What this PR does / why we need it?
enable_shared_pert_dp is currently on by default. This optimization is
currently only valid for deepseek series models. The default opening
affects the accuracy of the qwen series models.
### Does this PR introduce _any_ user-facing change?

### How was this patch tested?
use parameter --additional_config='{"enable_shared_expert_dp": true}'

- vLLM version: v0.10.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/d983769c41db224e0897fac2e9aefc5f57ad1122

Signed-off-by: Wang Kunpeng <1289706727@qq.com>
---
 docs/source/user_guide/configuration/additional_config.md | 2 +-
 vllm_ascend/ascend_config.py                              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/user_guide/configuration/additional_config.md b/docs/source/user_guide/configuration/additional_config.md
index 75d0149..31bae11 100644
--- a/docs/source/user_guide/configuration/additional_config.md
+++ b/docs/source/user_guide/configuration/additional_config.md
@@ -32,7 +32,7 @@ The following table lists the additional configuration options available in vLLM
 | `expert_map_path`             | str  | `None` | When using expert load balancing for the MOE model, an expert map path needs to be passed in. |
 | `chunked_prefill_for_mla`     | bool | `False` | Whether to enable the fused operator-like chunked_prefill. |
 | `kv_cache_dtype`     | str | `None` | When using the kv cache quantization method, kv cache dtype needs to be set, currently only int8 is supported. |
-| `enable_shared_expert_dp`     | bool | `True` | When the shared expert in DP, it has better performance but consumes more memory. When the memory is sensitive, this switch can be turned off manually. |
+| `enable_shared_expert_dp`     | bool | `False` | When the shared expert in DP, it has better performance but consumes more memory. Currently only DeepSeek series models are supported to use. |
 
 The details of each config option are as follows:
 
diff --git a/vllm_ascend/ascend_config.py b/vllm_ascend/ascend_config.py
index 9b35578..3769bcb 100644
--- a/vllm_ascend/ascend_config.py
+++ b/vllm_ascend/ascend_config.py
@@ -48,7 +48,7 @@ class AscendConfig:
         self.chunked_prefill_for_mla = additional_config.get(
             "chunked_prefill_for_mla", False)
         self.enable_shared_expert_dp = additional_config.get(
-            "enable_shared_expert_dp", True
+            "enable_shared_expert_dp", False
         ) and not self.torchair_graph_config.enabled and vllm_config.parallel_config.enable_expert_parallel