refactor: unify names of the feature field of MultimodalDataItem (#8075)

This commit is contained in:
Mick
2025-07-17 08:52:38 +08:00
committed by GitHub
parent c28ad1990d
commit 4395c87a9b
33 changed files with 66 additions and 83 deletions

View File

@@ -422,9 +422,7 @@ class Phi4MMForCausalLM(nn.Module):
def get_image_feature(self, items: List[MultimodalDataItem]) -> torch.Tensor:
dtype = next(self.vision_encoder.parameters()).dtype
pixel_values = torch.cat([item.pixel_values for item in items], dim=0).type(
dtype
)
pixel_values = torch.cat([item.feature for item in items], dim=0).type(dtype)
image_attention_mask = torch.cat([item.image_emb_mask for item in items], dim=0)
image_sizes = torch.cat([item.image_sizes for item in items], dim=0)
image_embeds = self.vision_encoder(