refactor: unify names of the feature field of MultimodalDataItem (#8075)

2025-07-17 08:52:38 +08:00
parent c28ad1990d
commit 4395c87a9b
33 changed files with 66 additions and 83 deletions
--- a/python/sglang/srt/models/phi4mm.py
+++ b/python/sglang/srt/models/phi4mm.py
@@ -422,9 +422,7 @@ class Phi4MMForCausalLM(nn.Module):

    def get_image_feature(self, items: List[MultimodalDataItem]) -> torch.Tensor:
        dtype = next(self.vision_encoder.parameters()).dtype
-        pixel_values = torch.cat([item.pixel_values for item in items], dim=0).type(
-            dtype
-        )
+        pixel_values = torch.cat([item.feature for item in items], dim=0).type(dtype)
        image_attention_mask = torch.cat([item.image_emb_mask for item in items], dim=0)
        image_sizes = torch.cat([item.image_sizes for item in items], dim=0)
        image_embeds = self.vision_encoder(