feat: support data parallel for deepseek (#1012)

### What this PR does / why we need it? feat: support data parallel for deepseek ### Does this PR introduce _any_ user-facing change? Yes, support dp for deepseek ### How was this patch tested? ``` export VLLM_ENABLE_MC2=0 export VLLM_USE_V1=1 export TASK_QUEUE_ENABLE=1 source /usr/local/Ascend/ascend-toolkit/set_env.sh source /usr/local/Ascend/nnal/atb/set_env.sh nohup python -m vllm.entrypoints.openai.api_server --model=/path/to/DeepSeek-R1-W8A8 \ --quantization ascend \ --served-model-name auto \ --trust-remote-code \ --distributed-executor-backend=mp \ --port 8006 \ -tp=8 \ -dp=2 \ --max-num-seqs 24 \ --max-model-len 4096 \ --max-num-batched-tokens 4096 \ --block-size 128 \ -O 0 \ --no-enable-prefix-caching \ --additional-config '{"torchair_graph_batch_sizes":[24],"expert_tensor_parallel_size":16,"ascend_scheduler_config":{},"enable_graph_mode":true}' \ --gpu-memory-utilization 0.95 &> run.log & disown ``` Signed-off-by: boying <897013703@qq.com>
2025-06-04 18:31:41 +08:00
parent 517811449e
commit da9acfca60
8 changed files with 212 additions and 88 deletions
--- a/vllm_ascend/platform.py
+++ b/vllm_ascend/platform.py
@@ -138,7 +138,7 @@ class NPUPlatform(Platform):

            # Calculate expert parallel size based on world size
            parallel_config.expert_parallel_size = (
-                parallel_config.world_size //
+                parallel_config.world_size_across_dp //
                parallel_config.expert_tensor_parallel_size)

        if model_config is None:
@@ -167,6 +167,8 @@ class NPUPlatform(Platform):
                        raise NotImplementedError(
                            "enable_graph_mode only works with deepseek model."
                        )
+                # Set compilation level to NO_COMPILATION to disable ACL Graph
+                compilation_config.level = CompilationLevel.NO_COMPILATION

        elif envs.VLLM_USE_V1 and model_config is not None and not enforce_eager:
            model_type = model_config.hf_config.model_type