update
This commit is contained in:
84
vllm/transformers_utils/processors/bagel.py
Normal file
84
vllm/transformers_utils/processors/bagel.py
Normal file
@@ -0,0 +1,84 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
# Copyright 2025 Bytedance Ltd. and/or its affiliates.
|
||||
"""BAGEL processor for image and text inputs."""
|
||||
|
||||
from transformers import AutoProcessor
|
||||
from transformers.feature_extraction_utils import BatchFeature
|
||||
from transformers.image_utils import ImageInput
|
||||
from transformers.processing_utils import ProcessingKwargs, ProcessorMixin, Unpack
|
||||
from transformers.tokenization_utils_base import PreTokenizedInput, TextInput
|
||||
|
||||
|
||||
class BagelProcessorKwargs(ProcessingKwargs, total=False): # type: ignore[call-arg]
|
||||
_defaults = {
|
||||
"images_kwargs": {
|
||||
"return_tensors": "pt",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class BagelProcessor(ProcessorMixin):
|
||||
"""
|
||||
Constructs a BAGEL processor which wraps a
|
||||
SigLIP image processor and a Qwen2 tokenizer.
|
||||
"""
|
||||
|
||||
attributes = ["image_processor", "tokenizer"]
|
||||
image_processor_class = "SiglipImageProcessor"
|
||||
tokenizer_class = "AutoTokenizer"
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
text: TextInput
|
||||
| PreTokenizedInput
|
||||
| list[TextInput]
|
||||
| list[PreTokenizedInput] = None,
|
||||
images: ImageInput = None,
|
||||
**kwargs: Unpack[BagelProcessorKwargs],
|
||||
):
|
||||
"""
|
||||
Main method to prepare for the model one or several sequences(s) and image(s).
|
||||
"""
|
||||
output_kwargs = self._merge_kwargs(
|
||||
BagelProcessorKwargs,
|
||||
tokenizer_init_kwargs=self.tokenizer.init_kwargs,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
if images is not None:
|
||||
# Process images with the image processor
|
||||
pixel_values = self.image_processor(
|
||||
images, **output_kwargs["images_kwargs"]
|
||||
)
|
||||
else:
|
||||
pixel_values = {}
|
||||
|
||||
text_inputs = (
|
||||
self.tokenizer(text, **output_kwargs["text_kwargs"])
|
||||
if text is not None
|
||||
else {}
|
||||
)
|
||||
|
||||
return BatchFeature(data={**pixel_values, **text_inputs})
|
||||
|
||||
def batch_decode(self, *args, **kwargs):
|
||||
"""
|
||||
This method forwards all its arguments to Qwen2TokenizerFast's batch_decode.
|
||||
"""
|
||||
return self.tokenizer.batch_decode(*args, **kwargs)
|
||||
|
||||
def decode(self, *args, **kwargs):
|
||||
"""
|
||||
This method forwards all its arguments to Qwen2TokenizerFast's decode.
|
||||
"""
|
||||
return self.tokenizer.decode(*args, **kwargs)
|
||||
|
||||
@property
|
||||
def model_input_names(self):
|
||||
tokenizer_input_names = self.tokenizer.model_input_names
|
||||
image_processor_input_names = self.image_processor.model_input_names
|
||||
return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))
|
||||
|
||||
|
||||
AutoProcessor.register("BagelProcessor", BagelProcessor)
|
||||
Reference in New Issue
Block a user