From 1de16ead8eecfec8903ec1b330b27a4fa2593c35 Mon Sep 17 00:00:00 2001 From: Wang Kunpeng <1289706727@qq.com> Date: Wed, 20 Aug 2025 20:25:53 +0800 Subject: [PATCH] [main][bugfix] Modify the default value of the enable_shared_pert_dp to false (#2457) ### What this PR does / why we need it? enable_shared_pert_dp is currently on by default. This optimization is currently only valid for deepseek series models. The default opening affects the accuracy of the qwen series models. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? use parameter --additional_config='{"enable_shared_expert_dp": true}' - vLLM version: v0.10.0 - vLLM main: https://github.com/vllm-project/vllm/commit/d983769c41db224e0897fac2e9aefc5f57ad1122 Signed-off-by: Wang Kunpeng <1289706727@qq.com> --- docs/source/user_guide/configuration/additional_config.md | 2 +- vllm_ascend/ascend_config.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/user_guide/configuration/additional_config.md b/docs/source/user_guide/configuration/additional_config.md index 75d0149..31bae11 100644 --- a/docs/source/user_guide/configuration/additional_config.md +++ b/docs/source/user_guide/configuration/additional_config.md @@ -32,7 +32,7 @@ The following table lists the additional configuration options available in vLLM | `expert_map_path` | str | `None` | When using expert load balancing for the MOE model, an expert map path needs to be passed in. | | `chunked_prefill_for_mla` | bool | `False` | Whether to enable the fused operator-like chunked_prefill. | | `kv_cache_dtype` | str | `None` | When using the kv cache quantization method, kv cache dtype needs to be set, currently only int8 is supported. | -| `enable_shared_expert_dp` | bool | `True` | When the shared expert in DP, it has better performance but consumes more memory. When the memory is sensitive, this switch can be turned off manually. | +| `enable_shared_expert_dp` | bool | `False` | When the shared expert in DP, it has better performance but consumes more memory. Currently only DeepSeek series models are supported to use. | The details of each config option are as follows: diff --git a/vllm_ascend/ascend_config.py b/vllm_ascend/ascend_config.py index 9b35578..3769bcb 100644 --- a/vllm_ascend/ascend_config.py +++ b/vllm_ascend/ascend_config.py @@ -48,7 +48,7 @@ class AscendConfig: self.chunked_prefill_for_mla = additional_config.get( "chunked_prefill_for_mla", False) self.enable_shared_expert_dp = additional_config.get( - "enable_shared_expert_dp", True + "enable_shared_expert_dp", False ) and not self.torchair_graph_config.enabled and vllm_config.parallel_config.enable_expert_parallel