From 57b9f021853a156deaadbabb3c62716815e21e5a Mon Sep 17 00:00:00 2001 From: Li Wang Date: Wed, 6 Aug 2025 19:48:10 +0800 Subject: [PATCH] [Bugfix] Fix disaggregated pd error (#2242) ### What this PR does / why we need it? Fix `ascend_env has no attr VLLM_ASCEND_ENABLE_CHUNK_MC2`, remove useless lines - vLLM version: v0.10.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9edd1db02bc6dce6da503503a373657f3466a78b --------- Signed-off-by: wangli --- vllm_ascend/worker/model_runner_v1.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index 2569c7e..3242fff 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -646,22 +646,6 @@ class NPUModelRunner(LoRAModelRunnerMixin): return maybe_padded_num_tokens, None, with_prefill, enable_dbo return num_tokens, None, with_prefill, enable_dbo - if self.is_kv_producer and not envs_ascend.VLLM_ASCEND_ENABLE_CHUNK_MC2: - num_tokens_across_dp = torch.tensor([num_tokens] * self.dp_size, - device="cpu", - dtype=torch.int32) - return num_tokens, num_tokens_across_dp, True, enable_dbo - - if self.is_kv_consumer and self.torchair_graph_enabled and len( - self.torchair_graph_batch_sizes - ) == 1 and not self.in_profile_run: - max_num_decode_tokens = self.torchair_graph_batch_sizes[0] - num_tokens_across_dp = torch.tensor([max_num_decode_tokens] * - self.dp_size, - device="cpu", - dtype=torch.int32) - return max_num_decode_tokens, num_tokens_across_dp, False, enable_dbo - maybe_padded_num_tokens = num_tokens num_tokens_across_dp, with_prefill, enable_dbo = self._get_forward_metadata_across_dp( num_tokens, with_prefill, enable_dbo)