From 600bf80c6d8657b0acca5936410c4543a2de13d7 Mon Sep 17 00:00:00 2001
From: Nagisa125 <166619298+Nagisa125@users.noreply.github.com>
Date: Thu, 30 Apr 2026 10:35:48 +0800
Subject: [PATCH] [CI]Fix the error caused by layer_sharding in dsv32 (#8719)

### What this PR does / why we need it?

This PR fixes the error in DSV32 mixed deployment caused by enabling
layer_sharding.

- Currently, mixed deployment no longer supports the enabling of
layer_sharding. Therefore, it has been removed from the service-oriented
configuration.
- The error "RPC call to sample_tokens timed out" occurred because the
dshm size limit was set too small. Therefore, it was increased to 512
Gi.

### Does this PR introduce _any_ user-facing change?

no.

### How was this patch tested?

The nightly test has passed.

Signed-off-by: wyh145 <1987244901@qq.com>
---
 .../multi_node/config/DeepSeek-V3_2-W8A8-A3-dual-nodes.yaml     | 2 --
 .../nightly/single_node/models/configs/DeepSeek-V3.2-W8A8.yaml  | 2 --
 2 files changed, 4 deletions(-)

diff --git a/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-A3-dual-nodes.yaml b/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-A3-dual-nodes.yaml
index a7851471..771b70a0 100644
--- a/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-A3-dual-nodes.yaml
+++ b/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-A3-dual-nodes.yaml
@@ -39,7 +39,6 @@ deployment:
       --trust-remote-code
       --speculative-config '{"num_speculative_tokens": 3, "method":"deepseek_mtp"}'
       --compilation-config '{"cudagraph_capture_sizes": [8, 16, 24, 32, 40, 48], "cudagraph_mode": "FULL_DECODE_ONLY"}' 
-      --additional-config '{"layer_sharding": ["q_b_proj", "o_proj"]}'
       --tokenizer-mode deepseek_v32
       --reasoning-parser deepseek_v3
 
@@ -64,7 +63,6 @@ deployment:
       --trust-remote-code
       --speculative-config '{"num_speculative_tokens": 3, "method":"deepseek_mtp"}'
       --compilation-config '{"cudagraph_capture_sizes": [8, 16, 24, 32, 40, 48], "cudagraph_mode": "FULL_DECODE_ONLY"}' 
-      --additional-config '{"layer_sharding": ["q_b_proj", "o_proj"]}'
       --tokenizer-mode deepseek_v32
       --reasoning-parser deepseek_v3
 benchmarks:
diff --git a/tests/e2e/nightly/single_node/models/configs/DeepSeek-V3.2-W8A8.yaml b/tests/e2e/nightly/single_node/models/configs/DeepSeek-V3.2-W8A8.yaml
index 1ab0b3ea..7931c974 100644
--- a/tests/e2e/nightly/single_node/models/configs/DeepSeek-V3.2-W8A8.yaml
+++ b/tests/e2e/nightly/single_node/models/configs/DeepSeek-V3.2-W8A8.yaml
@@ -38,8 +38,6 @@ test_cases:
       - '{"cudagraph_capture_sizes":[4, 8, 16, 20, 24, 28, 32], "cudagraph_mode":"FULL_DECODE_ONLY"}'
       - "--speculative-config"
       - '{"num_speculative_tokens": 3, "method":"deepseek_mtp"}'
-      - "--additional-config"
-      - '{"layer_sharding": ["q_b_proj", "o_proj"]}'
       - "--reasoning-parser"
       - "deepseek_v3"
       - "--tokenizer_mode"