vlm: remove redundant d2h movement of mm feature tensors (#9987)
Co-authored-by: Xiang (Kevin) Li <lik@nvidia.com>
This commit is contained in:
@@ -241,12 +241,13 @@ class BaseMultimodalProcessor(ABC):
|
|||||||
return_tensors="pt",
|
return_tensors="pt",
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
# move feature tensors to cpu
|
if not self.server_args.keep_mm_feature_on_device:
|
||||||
for feature_name in self.FEATURE_NAMES:
|
# move feature tensors to cpu
|
||||||
if feature_name in result and isinstance(
|
for feature_name in self.FEATURE_NAMES:
|
||||||
result[feature_name], torch.Tensor
|
if feature_name in result and isinstance(
|
||||||
):
|
result[feature_name], torch.Tensor
|
||||||
result[feature_name] = result[feature_name].to("cpu")
|
):
|
||||||
|
result[feature_name] = result[feature_name].to("cpu")
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|||||||
@@ -381,6 +381,7 @@ class ServerArgs:
|
|||||||
disable_shared_experts_fusion: bool = False
|
disable_shared_experts_fusion: bool = False
|
||||||
disable_chunked_prefix_cache: bool = False
|
disable_chunked_prefix_cache: bool = False
|
||||||
disable_fast_image_processor: bool = False
|
disable_fast_image_processor: bool = False
|
||||||
|
keep_mm_feature_on_device: bool = False
|
||||||
enable_return_hidden_states: bool = False
|
enable_return_hidden_states: bool = False
|
||||||
scheduler_recv_interval: int = 1
|
scheduler_recv_interval: int = 1
|
||||||
numa_node: Optional[List[int]] = None
|
numa_node: Optional[List[int]] = None
|
||||||
@@ -2213,6 +2214,11 @@ class ServerArgs:
|
|||||||
action="store_true",
|
action="store_true",
|
||||||
help="Adopt base image processor instead of fast image processor.",
|
help="Adopt base image processor instead of fast image processor.",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--keep-mm-feature-on-device",
|
||||||
|
action="store_true",
|
||||||
|
help="Keep multimodal feature tensors on device after processing to save D2H copy.",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--enable-return-hidden-states",
|
"--enable-return-hidden-states",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
|
|||||||
Reference in New Issue
Block a user