fix: InternS1 don't recognize image, updates image token for InternVL processor (#9381)
Signed-off-by: Xinyuan Tong <xinyuantong.cs@gmail.com>
This commit is contained in:
@@ -44,7 +44,7 @@ class InternVLImageProcessor(BaseMultimodalProcessor):
|
||||
self.img_start_token_id = tokenizer.convert_tokens_to_ids(self.IMG_START_TOKEN)
|
||||
self.img_end_token_id = tokenizer.convert_tokens_to_ids(self.IMG_END_TOKEN)
|
||||
self.mm_tokens = MultimodalSpecialTokens(
|
||||
image_token="<image>",
|
||||
image_token="<IMG_CONTEXT>",
|
||||
image_token_id=tokenizer.convert_tokens_to_ids(self.IMG_CONTEXT_TOKEN),
|
||||
).build(_image_processor)
|
||||
|
||||
@@ -218,13 +218,18 @@ class InternVLImageProcessor(BaseMultimodalProcessor):
|
||||
|
||||
pixel_values = torch.cat(pixel_values, dim=0)
|
||||
|
||||
original_placeholder = "<<<__IMG_CONTEXT_PLACEHOLDER__>>>"
|
||||
input_text = input_text.replace(self.IMG_CONTEXT_TOKEN, original_placeholder)
|
||||
|
||||
for idx, num_patches in enumerate(num_patches_list):
|
||||
image_tokens = (
|
||||
self.IMG_START_TOKEN
|
||||
+ self.IMG_CONTEXT_TOKEN * self.num_image_token * num_patches
|
||||
+ self.IMG_END_TOKEN
|
||||
)
|
||||
input_text = input_text.replace("<image>", image_tokens, 1)
|
||||
input_text = input_text.replace(original_placeholder, image_tokens, 1)
|
||||
|
||||
input_text = input_text.replace(original_placeholder, self.IMG_CONTEXT_TOKEN)
|
||||
|
||||
input_ids = self.tokenizer(input_text, return_tensors="pt")[
|
||||
"input_ids"
|
||||
|
||||
Reference in New Issue
Block a user