From e25c57b3461b8116d533724013fbc9957716f92d Mon Sep 17 00:00:00 2001 From: knight0528 <166722435+knight0528@users.noreply.github.com> Date: Mon, 15 Dec 2025 16:27:17 +0800 Subject: [PATCH] [Bugfix] Add support for PP intermediate value types in graph mode (#4902) This PR adds support for handling intermediate value types in pipeline parallelism when running in graph mode. - vLLM version: v0.12.0 - vLLM main: https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9 --------- Signed-off-by: zhangshushun <3265779424@qq.com> Co-authored-by: Jade Zheng --- vllm_ascend/utils.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/vllm_ascend/utils.py b/vllm_ascend/utils.py index 1dad4a28..80299a05 100644 --- a/vllm_ascend/utils.py +++ b/vllm_ascend/utils.py @@ -31,6 +31,7 @@ import torch_npu # noqa: F401 from packaging.version import InvalidVersion, Version from torch_npu.npu.streams import Event from vllm.logger import logger +from vllm.sequence import IntermediateTensors import vllm_ascend.envs as envs_ascend from vllm_ascend.ascend_config import get_ascend_config @@ -844,6 +845,13 @@ def weak_ref_tensors( return [weak_ref_tensor(t) for t in tensors] if isinstance(tensors, tuple): return tuple(weak_ref_tensor(t) for t in tensors) + # For IntermediateTensors used in pipeline parallelism + if isinstance(tensors, IntermediateTensors): + ret = IntermediateTensors({ + key: weak_ref_tensor(val) + for key, val in tensors.tensors.items() + }) + return ret raise ValueError("Invalid type for tensors")