refactor: bug fixes and refactor for vlm (#4661)

This commit is contained in:
Mick
2025-03-23 13:48:49 +08:00
committed by GitHub
parent ca75741e86
commit 11577cedb7
31 changed files with 770 additions and 735 deletions

View File

@@ -9,8 +9,6 @@ import PIL
import torch
from PIL.Image import Image
from transformers import (
AutoImageProcessor,
AutoProcessor,
BaseImageProcessor,
BatchFeature,
LlamaConfig,
@@ -20,6 +18,7 @@ from transformers import (
)
from transformers.image_utils import to_numpy_array
from sglang.srt.configs.utils import register_image_processor, register_processor
from sglang.srt.mm_utils import expand2square
@@ -625,5 +624,5 @@ class VLMImageProcessorConfig(PretrainedConfig):
super().__init__(**kwargs)
AutoProcessor.register(MultiModalityConfig, VLChatProcessor, exist_ok=True)
AutoImageProcessor.register(VLMImageProcessorConfig, None, VLMImageProcessor, None)
register_processor(MultiModalityConfig, VLChatProcessor)
register_image_processor(MultiModalityConfig, VLMImageProcessor)

View File

@@ -460,6 +460,7 @@ def is_generation_model(model_architectures: List[str], is_embedding: bool = Fal
multimodal_model_archs = [
"DeepseekVL2ForCausalLM",
"LlavaLlamaForCausalLM",
"LlavaQwenForCausalLM",
"LlavaMistralForCausalLM",
@@ -472,7 +473,6 @@ multimodal_model_archs = [
"Qwen2_5_VLForConditionalGeneration",
"MiniCPMV",
"MultiModalityCausalLM",
"DeepseekVL2ForCausalLM",
]

View File

@@ -0,0 +1,25 @@
from typing import Type
from transformers import (
AutoImageProcessor,
AutoProcessor,
BaseImageProcessor,
PretrainedConfig,
ProcessorMixin,
)
def register_image_processor(
config: Type[PretrainedConfig], image_processor: Type[BaseImageProcessor]
):
"""
register customized hf image processor while removing hf impl
"""
AutoImageProcessor.register(config, None, image_processor, None, exist_ok=True)
def register_processor(config: Type[PretrainedConfig], processor: Type[ProcessorMixin]):
"""
register customized hf processor while removing hf impl
"""
AutoProcessor.register(config, processor, exist_ok=True)