[Bugfix] Add support for PP intermediate value types in graph mode (#4902)

This PR adds support for handling intermediate value types in pipeline
parallelism when running in graph mode.


- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c

---------

Signed-off-by: zhangshushun <3265779424@qq.com>
Co-authored-by: Jade Zheng <zheng.shoujian@outlook.com>
This commit is contained in:
knight0528
2025-12-15 16:27:17 +08:00
committed by GitHub
parent e16444f21f
commit e25c57b346

View File

@@ -31,6 +31,7 @@ import torch_npu # noqa: F401
from packaging.version import InvalidVersion, Version
from torch_npu.npu.streams import Event
from vllm.logger import logger
from vllm.sequence import IntermediateTensors
import vllm_ascend.envs as envs_ascend
from vllm_ascend.ascend_config import get_ascend_config
@@ -844,6 +845,13 @@ def weak_ref_tensors(
return [weak_ref_tensor(t) for t in tensors]
if isinstance(tensors, tuple):
return tuple(weak_ref_tensor(t) for t in tensors)
# For IntermediateTensors used in pipeline parallelism
if isinstance(tensors, IntermediateTensors):
ret = IntermediateTensors({
key: weak_ref_tensor(val)
for key, val in tensors.tensors.items()
})
return ret
raise ValueError("Invalid type for tensors")