vlm: support video as an input modality (#5888)

This commit is contained in:
Mick
2025-07-10 14:48:35 +08:00
committed by GitHub
parent 4ed57807c2
commit b5e3d6031c
42 changed files with 887 additions and 524 deletions

View File

@@ -787,7 +787,9 @@ class LlavaForConditionalGeneration(LlavaBaseForCausalLM):
forward_batch=forward_batch,
get_embedding=get_embedding,
language_model=self.language_model,
image_data_embedding_func=self.get_image_feature,
data_embedding_funcs={
Modality.IMAGE: self.get_image_feature,
},
placeholder_tokens=None, # using mm_item.pad_value
positions=positions,
)