model: adapt mllama4 to VisionAttention (#8512)

Co-authored-by: root <mickjagger19@icloud.com>
This commit is contained in:
Wenchen Lo
2025-08-02 00:39:40 -07:00
committed by GitHub
parent 4bec99ecd0
commit ea93079b30
6 changed files with 518 additions and 52 deletions

View File

@@ -14,7 +14,6 @@
"""Utilities for Huggingface Transformers."""
import contextlib
import logging
import os
import warnings
from pathlib import Path
@@ -45,7 +44,7 @@ from sglang.srt.configs import (
)
from sglang.srt.configs.internvl import InternVLChatConfig
from sglang.srt.connector import create_remote_connector
from sglang.srt.utils import is_remote_url, lru_cache_frozenset
from sglang.srt.utils import is_remote_url, logger, lru_cache_frozenset
_CONFIG_REGISTRY: Dict[str, Type[PretrainedConfig]] = {
ChatGLMConfig.model_type: ChatGLMConfig,
@@ -317,15 +316,31 @@ def get_processor(
if config.model_type not in {"llava", "clip"}:
kwargs["use_fast"] = use_fast
try:
processor = AutoProcessor.from_pretrained(
tokenizer_name,
*args,
trust_remote_code=trust_remote_code,
revision=revision,
**kwargs,
)
processor = AutoProcessor.from_pretrained(
tokenizer_name,
*args,
trust_remote_code=trust_remote_code,
revision=revision,
**kwargs,
)
except ValueError as e:
error_message = str(e)
if "does not have a slow version" in error_message:
logger.info(
f"Processor {tokenizer_name} does not have a slow version. Automatically use fast version"
)
kwargs["use_fast"] = True
processor = AutoProcessor.from_pretrained(
tokenizer_name,
*args,
trust_remote_code=trust_remote_code,
revision=revision,
**kwargs,
)
else:
raise e
tokenizer = get_tokenizer_from_processor(processor)
attach_additional_stop_token_ids(tokenizer)