From 30e3d86b0f49c68352f24b4ac8da2988a2f1d7fc Mon Sep 17 00:00:00 2001
From: xuyexiong <xuyexiong@huawei.com>
Date: Fri, 17 Oct 2025 09:42:48 +0800
Subject: [PATCH] Revert "[BUGFIX] Mtp torchair pd fix (#3449)" (#3500)

This reverts commit b0ae203e72d87985314d583e211dddca6f351958.

### What this PR does / why we need it?
The fix is not ready yet, conflict with #3411 need to revert first. Will
fix this issue later

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

Signed-off-by: xuyexiong <xuyexiong@huawei.com>
---
 vllm_ascend/torchair/torchair_model_runner.py | 29 +++----------------
 1 file changed, 4 insertions(+), 25 deletions(-)

diff --git a/vllm_ascend/torchair/torchair_model_runner.py b/vllm_ascend/torchair/torchair_model_runner.py
index e6ef03e..b29fdc2 100644
--- a/vllm_ascend/torchair/torchair_model_runner.py
+++ b/vllm_ascend/torchair/torchair_model_runner.py
@@ -452,31 +452,10 @@ class NPUTorchairModelRunner(NPUModelRunner):
                 self.torchair_graph_batch_sizes.append(self.max_num_reqs)
 
         # padded max number tokens = max_num_req * decode_token_per_req
-        if self.decode_token_per_req > 1:
-            # pd disaggregation scenario need redundant_batch_sizes to avoid each batch's seq_len exceed 16 tokens
-            if self.is_kv_consumer:
-                FIA_SEQ_LEN_LIMIT = 16
-                self.torchair_graph_batch_sizes = [
-                    (graph_batch_size +
-                     math.ceil(graph_batch_size / FIA_SEQ_LEN_LIMIT) +
-                     math.ceil(graph_batch_size * self.decode_token_per_req /
-                               FIA_SEQ_LEN_LIMIT / FIA_SEQ_LEN_LIMIT)) *
-                    self.decode_token_per_req
-                    for graph_batch_size in self.torchair_graph_batch_sizes
-                ]
-                new_max_num_reqs = math.ceil(
-                    max(self.torchair_graph_batch_sizes) /
-                    self.decode_token_per_req)
-                if self.max_num_reqs < new_max_num_reqs:
-                    logger.warning(
-                        f"max_num_reqs is updated to {new_max_num_reqs}")
-                    self.max_num_reqs = new_max_num_reqs
-                    self.scheduler_config.max_num_seqs = new_max_num_reqs
-            else:
-                self.torchair_graph_batch_sizes = [
-                    graph_batch_size * self.decode_token_per_req
-                    for graph_batch_size in self.torchair_graph_batch_sizes
-                ]
+        self.torchair_graph_batch_sizes = [
+            graph_batch_size * self.decode_token_per_req
+            for graph_batch_size in self.torchair_graph_batch_sizes
+        ]
 
         # NOTE: when enable_expert_parallel on A3, we need to check if `graph_batch_size` is divisible by `tp_size`
         # Because we use x_active_mask for dispatch/combine op on A3, which requires that input shape should be same