model: adapt mllama4 to VisionAttention (#8512)
Co-authored-by: root <mickjagger19@icloud.com>
This commit is contained in:
@@ -12,7 +12,6 @@ import torch
|
||||
from PIL import Image
|
||||
from transformers import BaseImageProcessorFast
|
||||
|
||||
from sglang.srt.managers.mm_utils import TransportProxyTensor
|
||||
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
||||
from sglang.srt.utils import load_audio, load_image, load_video, logger
|
||||
|
||||
@@ -218,8 +217,10 @@ class BaseMultimodalProcessor(ABC):
|
||||
kwargs["audio"] = audios
|
||||
|
||||
processor = self._processor
|
||||
if hasattr(processor, "image_processor") and isinstance(
|
||||
processor.image_processor, BaseImageProcessorFast
|
||||
if (
|
||||
hasattr(processor, "image_processor")
|
||||
and isinstance(processor.image_processor, BaseImageProcessorFast)
|
||||
and not self.server_args.disable_fast_image_processor
|
||||
):
|
||||
kwargs["device"] = "cuda"
|
||||
result = processor.__call__(
|
||||
|
||||
Reference in New Issue
Block a user