Refactor vlm embedding routine to use precomputed feature (#6543)
Signed-off-by: Xinyuan Tong <justinning0323@outlook.com>
This commit is contained in:
@@ -499,12 +499,6 @@ class Qwen2_5_VLForConditionalGeneration(nn.Module):
|
||||
return pattern.pad_input_tokens(input_ids, mm_inputs)
|
||||
|
||||
def get_image_feature(self, items: List[MultimodalDataItem]) -> torch.Tensor:
|
||||
if any(item.precomputed_features is not None for item in items):
|
||||
if not all(item.precomputed_features is not None for item in items):
|
||||
raise NotImplementedError(
|
||||
"MM inputs where only some items are precomputed."
|
||||
)
|
||||
return torch.concat([item.precomputed_features for item in items])
|
||||
# in qwen-vl, last dim is the same
|
||||
pixel_values = torch.cat([item.pixel_values for item in items], dim=0).type(
|
||||
self.visual.dtype
|
||||
|
||||
Reference in New Issue
Block a user