[Bugfix] Add support for PP intermediate value types in graph mode (#4902)
This PR adds support for handling intermediate value types in pipeline
parallelism when running in graph mode.
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
---------
Signed-off-by: zhangshushun <3265779424@qq.com>
Co-authored-by: Jade Zheng <zheng.shoujian@outlook.com>
This commit is contained in:
@@ -31,6 +31,7 @@ import torch_npu # noqa: F401
|
||||
from packaging.version import InvalidVersion, Version
|
||||
from torch_npu.npu.streams import Event
|
||||
from vllm.logger import logger
|
||||
from vllm.sequence import IntermediateTensors
|
||||
|
||||
import vllm_ascend.envs as envs_ascend
|
||||
from vllm_ascend.ascend_config import get_ascend_config
|
||||
@@ -844,6 +845,13 @@ def weak_ref_tensors(
|
||||
return [weak_ref_tensor(t) for t in tensors]
|
||||
if isinstance(tensors, tuple):
|
||||
return tuple(weak_ref_tensor(t) for t in tensors)
|
||||
# For IntermediateTensors used in pipeline parallelism
|
||||
if isinstance(tensors, IntermediateTensors):
|
||||
ret = IntermediateTensors({
|
||||
key: weak_ref_tensor(val)
|
||||
for key, val in tensors.tensors.items()
|
||||
})
|
||||
return ret
|
||||
raise ValueError("Invalid type for tensors")
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user