vlm: remove redundant d2h movement of mm feature tensors (#9987)
Co-authored-by: Xiang (Kevin) Li <lik@nvidia.com>
This commit is contained in:
@@ -241,12 +241,13 @@ class BaseMultimodalProcessor(ABC):
|
||||
return_tensors="pt",
|
||||
**kwargs,
|
||||
)
|
||||
# move feature tensors to cpu
|
||||
for feature_name in self.FEATURE_NAMES:
|
||||
if feature_name in result and isinstance(
|
||||
result[feature_name], torch.Tensor
|
||||
):
|
||||
result[feature_name] = result[feature_name].to("cpu")
|
||||
if not self.server_args.keep_mm_feature_on_device:
|
||||
# move feature tensors to cpu
|
||||
for feature_name in self.FEATURE_NAMES:
|
||||
if feature_name in result and isinstance(
|
||||
result[feature_name], torch.Tensor
|
||||
):
|
||||
result[feature_name] = result[feature_name].to("cpu")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@@ -381,6 +381,7 @@ class ServerArgs:
|
||||
disable_shared_experts_fusion: bool = False
|
||||
disable_chunked_prefix_cache: bool = False
|
||||
disable_fast_image_processor: bool = False
|
||||
keep_mm_feature_on_device: bool = False
|
||||
enable_return_hidden_states: bool = False
|
||||
scheduler_recv_interval: int = 1
|
||||
numa_node: Optional[List[int]] = None
|
||||
@@ -2213,6 +2214,11 @@ class ServerArgs:
|
||||
action="store_true",
|
||||
help="Adopt base image processor instead of fast image processor.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--keep-mm-feature-on-device",
|
||||
action="store_true",
|
||||
help="Keep multimodal feature tensors on device after processing to save D2H copy.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enable-return-hidden-states",
|
||||
action="store_true",
|
||||
|
||||
Reference in New Issue
Block a user