vlm: remove redundant d2h movement of mm feature tensors (#9987)

Co-authored-by: Xiang (Kevin) Li <lik@nvidia.com>
This commit is contained in:
Kevin Xiang Li
2025-09-17 15:00:39 -07:00
committed by GitHub
parent 564050766d
commit de28f8e741
2 changed files with 13 additions and 6 deletions

View File

@@ -241,12 +241,13 @@ class BaseMultimodalProcessor(ABC):
return_tensors="pt",
**kwargs,
)
# move feature tensors to cpu
for feature_name in self.FEATURE_NAMES:
if feature_name in result and isinstance(
result[feature_name], torch.Tensor
):
result[feature_name] = result[feature_name].to("cpu")
if not self.server_args.keep_mm_feature_on_device:
# move feature tensors to cpu
for feature_name in self.FEATURE_NAMES:
if feature_name in result and isinstance(
result[feature_name], torch.Tensor
):
result[feature_name] = result[feature_name].to("cpu")
return result