[Bugfix] Add support for PP intermediate value types in graph mode (#4902)

This PR adds support for handling intermediate value types in pipeline parallelism when running in graph mode. - vLLM version: v0.12.0 - vLLM main: ad32e3e19c --------- Signed-off-by: zhangshushun <3265779424@qq.com> Co-authored-by: Jade Zheng <zheng.shoujian@outlook.com>
2025-12-15 16:27:17 +08:00
parent e16444f21f
commit e25c57b346
1 changed files with 8 additions and 0 deletions
--- a/vllm_ascend/utils.py
+++ b/vllm_ascend/utils.py
@@ -31,6 +31,7 @@ import torch_npu  # noqa: F401
 from packaging.version import InvalidVersion, Version
 from torch_npu.npu.streams import Event
 from vllm.logger import logger
+from vllm.sequence import IntermediateTensors

 import vllm_ascend.envs as envs_ascend
 from vllm_ascend.ascend_config import get_ascend_config
@@ -844,6 +845,13 @@ def weak_ref_tensors(
        return [weak_ref_tensor(t) for t in tensors]
    if isinstance(tensors, tuple):
        return tuple(weak_ref_tensor(t) for t in tensors)
+    # For IntermediateTensors used in pipeline parallelism
+    if isinstance(tensors, IntermediateTensors):
+        ret = IntermediateTensors({
+            key: weak_ref_tensor(val)
+            for key, val in tensors.tensors.items()
+        })
+        return ret
    raise ValueError("Invalid type for tensors")