vlm: remove redundant d2h movement of mm feature tensors (#9987)
Co-authored-by: Xiang (Kevin) Li <lik@nvidia.com>
This commit is contained in:
@@ -381,6 +381,7 @@ class ServerArgs:
|
||||
disable_shared_experts_fusion: bool = False
|
||||
disable_chunked_prefix_cache: bool = False
|
||||
disable_fast_image_processor: bool = False
|
||||
keep_mm_feature_on_device: bool = False
|
||||
enable_return_hidden_states: bool = False
|
||||
scheduler_recv_interval: int = 1
|
||||
numa_node: Optional[List[int]] = None
|
||||
@@ -2213,6 +2214,11 @@ class ServerArgs:
|
||||
action="store_true",
|
||||
help="Adopt base image processor instead of fast image processor.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--keep-mm-feature-on-device",
|
||||
action="store_true",
|
||||
help="Keep multimodal feature tensors on device after processing to save D2H copy.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enable-return-hidden-states",
|
||||
action="store_true",
|
||||
|
||||
Reference in New Issue
Block a user