From 1ab6cd49358e0ea91ae277703f054f1df0dce2e6 Mon Sep 17 00:00:00 2001
From: Zetong Li <48438720+slippersss@users.noreply.github.com>
Date: Wed, 21 Jan 2026 09:24:33 +0800
Subject: [PATCH] [Bugfix] Fix setting of `speculative_config.enforce_eager`
 for dsv32 (#5945)

### What this PR does / why we need it?
This PR aims to fix setting of `speculative_config.enforce_eager` in
deepseek v3.2 mtp. The point is that, vllm sets
`speculative_config.enforce_eager` as True if using deepseek_v32 with
mtp. Since we support graph mode, we simply ignore it here. However,
this fix will also implicitly ignore user setting of
`speculative_config.enforce_eager`, we need to take care and remove it
once vllm supports this feature.

### Does this PR introduce _any_ user-facing change?
N/A

### How was this patch tested?
by ci

- vLLM version: v0.13.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/2c24bc6996cb165fce92f780b388a5e39b3f4060

Signed-off-by: Zetong Li <slippersss@126.com>
---
 vllm_ascend/platform.py                 | 15 +++++++++++++++
 vllm_ascend/spec_decode/mtp_proposer.py |  2 +-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py
index e20ab2b9..db47ab21 100644
--- a/vllm_ascend/platform.py
+++ b/vllm_ascend/platform.py
@@ -412,6 +412,21 @@ class NPUPlatform(Platform):
             os.environ["PYTORCH_NPU_ALLOC_CONF"] = npu_alloc_configs
             logger.info("Set PYTORCH_NPU_ALLOC_CONF=%s", npu_alloc_configs)
 
+        # NOTE: vllm sets `speculative_config.enforce_eager` as True if using
+        # deepseek_v32 with mtp. Since we support graph mode, we simply ignore
+        # it here. However, this fix will also implicitly ignore user setting of
+        # `speculative_config.enforce_eager`, we need to take care and remove it
+        # once vllm supports this feature.
+        speculative_config = vllm_config.speculative_config
+        if (
+            model_config
+            and speculative_config
+            and hasattr(model_config.hf_text_config, "model_type")
+            and model_config.hf_text_config.model_type == "deepseek_v32"
+            and speculative_config.enforce_eager
+        ):
+            speculative_config.enforce_eager = False
+
     @classmethod
     def import_kernels(cls) -> None:
         # Directly importing vllm_ascend_C prevents ASCEND_RT_VISIBLE_DEVICES
diff --git a/vllm_ascend/spec_decode/mtp_proposer.py b/vllm_ascend/spec_decode/mtp_proposer.py
index eab2846d..a5ce1e57 100644
--- a/vllm_ascend/spec_decode/mtp_proposer.py
+++ b/vllm_ascend/spec_decode/mtp_proposer.py
@@ -246,7 +246,7 @@ class MtpProposer(EagleProposer):
                 -1]:
             num_input_tokens = self.vllm_config.pad_for_cudagraph(
                 num_scheduled_tokens)
-        elif self.use_aclgraph  and num_tokens <= self.runner.cudagraph_batch_sizes[
+        elif self.use_aclgraph and num_tokens <= self.runner.cudagraph_batch_sizes[
                 -1]:
             # Acl graph mode, add padding to the batch size
             num_input_tokens = self.vllm_config.pad_for_cudagraph(num_tokens)