Move multimodal processors into a separate folder (#7581)
This commit is contained in:
@@ -1,8 +0,0 @@
|
|||||||
# COPIED FROM DeepGEMM
|
|
||||||
def align(x: int, y: int) -> int:
|
|
||||||
return ceil_div(x, y) * y
|
|
||||||
|
|
||||||
|
|
||||||
# COPIED FROM DeepGEMM
|
|
||||||
def ceil_div(x: int, y: int) -> int:
|
|
||||||
return (x + y - 1) // y
|
|
||||||
@@ -19,7 +19,7 @@ from transformers import (
|
|||||||
from transformers.image_utils import to_numpy_array
|
from transformers.image_utils import to_numpy_array
|
||||||
|
|
||||||
from sglang.srt.configs.utils import register_image_processor, register_processor
|
from sglang.srt.configs.utils import register_image_processor, register_processor
|
||||||
from sglang.srt.mm_utils import expand2square
|
from sglang.srt.multimodal.mm_utils import expand2square
|
||||||
|
|
||||||
|
|
||||||
class DictToObject(dict):
|
class DictToObject(dict):
|
||||||
|
|||||||
@@ -4,9 +4,8 @@ from typing import List, Optional
|
|||||||
import torch
|
import torch
|
||||||
import triton
|
import triton
|
||||||
|
|
||||||
from sglang.math_utils import ceil_div
|
|
||||||
from sglang.srt.layers.quantization.fp8_kernel import per_token_group_quant_fp8
|
from sglang.srt.layers.quantization.fp8_kernel import per_token_group_quant_fp8
|
||||||
from sglang.srt.utils import dispose_tensor, is_cuda
|
from sglang.srt.utils import ceil_div, dispose_tensor, is_cuda
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|||||||
@@ -12,7 +12,6 @@ import torch
|
|||||||
import triton
|
import triton
|
||||||
import triton.language as tl
|
import triton.language as tl
|
||||||
|
|
||||||
from sglang.math_utils import ceil_div
|
|
||||||
from sglang.srt.layers.moe.topk import select_experts
|
from sglang.srt.layers.moe.topk import select_experts
|
||||||
from sglang.srt.layers.quantization.fp8_kernel import (
|
from sglang.srt.layers.quantization.fp8_kernel import (
|
||||||
per_token_group_quant_fp8,
|
per_token_group_quant_fp8,
|
||||||
@@ -25,6 +24,7 @@ from sglang.srt.layers.quantization.int8_kernel import (
|
|||||||
sglang_per_token_group_quant_int8,
|
sglang_per_token_group_quant_int8,
|
||||||
)
|
)
|
||||||
from sglang.srt.utils import (
|
from sglang.srt.utils import (
|
||||||
|
ceil_div,
|
||||||
cpu_has_amx_support,
|
cpu_has_amx_support,
|
||||||
direct_register_custom_op,
|
direct_register_custom_op,
|
||||||
get_bool_env_var,
|
get_bool_env_var,
|
||||||
@@ -32,7 +32,6 @@ from sglang.srt.utils import (
|
|||||||
is_cpu,
|
is_cpu,
|
||||||
is_cuda,
|
is_cuda,
|
||||||
is_hip,
|
is_hip,
|
||||||
log_info_on_rank0,
|
|
||||||
next_power_of_2,
|
next_power_of_2,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -23,9 +23,9 @@ import torch
|
|||||||
import triton
|
import triton
|
||||||
import triton.language as tl
|
import triton.language as tl
|
||||||
|
|
||||||
from sglang.math_utils import align
|
|
||||||
from sglang.srt.layers.quantization import deep_gemm_wrapper
|
from sglang.srt.layers.quantization import deep_gemm_wrapper
|
||||||
from sglang.srt.utils import (
|
from sglang.srt.utils import (
|
||||||
|
align,
|
||||||
direct_register_custom_op,
|
direct_register_custom_op,
|
||||||
get_device_core_count,
|
get_device_core_count,
|
||||||
get_device_name,
|
get_device_name,
|
||||||
|
|||||||
@@ -1,9 +1,7 @@
|
|||||||
from typing import Callable, List, Optional, Tuple
|
from typing import Callable, List, Optional, Tuple
|
||||||
|
|
||||||
import einops
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from sglang.math_utils import align
|
|
||||||
from sglang.srt.layers.quantization import deep_gemm_wrapper
|
from sglang.srt.layers.quantization import deep_gemm_wrapper
|
||||||
from sglang.srt.layers.quantization.fp8_kernel import sglang_per_token_group_quant_fp8
|
from sglang.srt.layers.quantization.fp8_kernel import sglang_per_token_group_quant_fp8
|
||||||
from sglang.srt.layers.utils import is_sm100_supported
|
from sglang.srt.layers.utils import is_sm100_supported
|
||||||
@@ -27,6 +25,7 @@ from sglang.srt.layers.quantization.fp8_kernel import (
|
|||||||
w8a8_block_fp8_matmul_triton,
|
w8a8_block_fp8_matmul_triton,
|
||||||
)
|
)
|
||||||
from sglang.srt.utils import (
|
from sglang.srt.utils import (
|
||||||
|
align,
|
||||||
get_bool_env_var,
|
get_bool_env_var,
|
||||||
get_cuda_version,
|
get_cuda_version,
|
||||||
get_device_capability,
|
get_device_capability,
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ from dataclasses import dataclass, field
|
|||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
||||||
|
|
||||||
from sglang.srt.mm_utils import has_valid_data
|
from sglang.srt.multimodal.mm_utils import has_valid_data
|
||||||
|
|
||||||
# handle serialization of Image for pydantic
|
# handle serialization of Image for pydantic
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
|||||||
@@ -2,8 +2,6 @@
|
|||||||
Multi-modality utils
|
Multi-modality utils
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import dataclasses
|
|
||||||
import logging
|
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
from typing import Callable, List, Optional, Tuple
|
from typing import Callable, List, Optional, Tuple
|
||||||
|
|
||||||
|
|||||||
@@ -5,9 +5,7 @@ import logging
|
|||||||
import pkgutil
|
import pkgutil
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
|
|
||||||
from sglang.srt.managers.multimodal_processors.base_processor import (
|
from sglang.srt.multimodal.processors.base_processor import BaseMultimodalProcessor
|
||||||
BaseMultimodalProcessor,
|
|
||||||
)
|
|
||||||
from sglang.srt.server_args import ServerArgs
|
from sglang.srt.server_args import ServerArgs
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -29,7 +27,7 @@ def get_dummy_processor():
|
|||||||
|
|
||||||
@lru_cache()
|
@lru_cache()
|
||||||
def import_processors():
|
def import_processors():
|
||||||
package_name = "sglang.srt.managers.multimodal_processors"
|
package_name = "sglang.srt.multimodal.processors"
|
||||||
package = importlib.import_module(package_name)
|
package = importlib.import_module(package_name)
|
||||||
for _, name, ispkg in pkgutil.iter_modules(package.__path__, package_name + "."):
|
for _, name, ispkg in pkgutil.iter_modules(package.__path__, package_name + "."):
|
||||||
if not ispkg:
|
if not ispkg:
|
||||||
|
|||||||
@@ -41,16 +41,16 @@ from sglang.srt.managers.schedule_batch import (
|
|||||||
MultimodalDataItem,
|
MultimodalDataItem,
|
||||||
MultimodalInputs,
|
MultimodalInputs,
|
||||||
)
|
)
|
||||||
from sglang.srt.mm_utils import (
|
|
||||||
get_anyres_image_grid_shape,
|
|
||||||
unpad_image,
|
|
||||||
unpad_image_shape,
|
|
||||||
)
|
|
||||||
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
|
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
|
||||||
from sglang.srt.model_loader.weight_utils import default_weight_loader
|
from sglang.srt.model_loader.weight_utils import default_weight_loader
|
||||||
from sglang.srt.models.llama import LlamaForCausalLM
|
from sglang.srt.models.llama import LlamaForCausalLM
|
||||||
from sglang.srt.models.mistral import MistralForCausalLM
|
from sglang.srt.models.mistral import MistralForCausalLM
|
||||||
from sglang.srt.models.qwen2 import Qwen2ForCausalLM
|
from sglang.srt.models.qwen2 import Qwen2ForCausalLM
|
||||||
|
from sglang.srt.multimodal.mm_utils import (
|
||||||
|
get_anyres_image_grid_shape,
|
||||||
|
unpad_image,
|
||||||
|
unpad_image_shape,
|
||||||
|
)
|
||||||
from sglang.srt.utils import add_prefix, flatten_nested_list, logger
|
from sglang.srt.utils import add_prefix, flatten_nested_list, logger
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,10 +1,8 @@
|
|||||||
from typing import List, Union
|
from typing import List, Union
|
||||||
|
|
||||||
from sglang.srt.managers.multimodal_processors.base_processor import (
|
|
||||||
BaseMultimodalProcessor,
|
|
||||||
)
|
|
||||||
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
||||||
from sglang.srt.models.clip import CLIPModel
|
from sglang.srt.models.clip import CLIPModel
|
||||||
|
from sglang.srt.multimodal.processors.base_processor import BaseMultimodalProcessor
|
||||||
from sglang.srt.utils import load_image
|
from sglang.srt.utils import load_image
|
||||||
|
|
||||||
|
|
||||||
@@ -20,12 +20,12 @@ from typing import List, Union
|
|||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from sglang.srt.managers.multimodal_processors.base_processor import (
|
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
||||||
|
from sglang.srt.models.deepseek_vl2 import DeepseekVL2ForCausalLM
|
||||||
|
from sglang.srt.multimodal.processors.base_processor import (
|
||||||
BaseMultimodalProcessor,
|
BaseMultimodalProcessor,
|
||||||
MultimodalSpecialTokens,
|
MultimodalSpecialTokens,
|
||||||
)
|
)
|
||||||
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
|
||||||
from sglang.srt.models.deepseek_vl2 import DeepseekVL2ForCausalLM
|
|
||||||
|
|
||||||
|
|
||||||
class DeepseekVL2ImageProcessor(BaseMultimodalProcessor):
|
class DeepseekVL2ImageProcessor(BaseMultimodalProcessor):
|
||||||
@@ -4,11 +4,9 @@ from typing import Dict, List, Union
|
|||||||
from sglang.srt.managers.multimodal_processor import (
|
from sglang.srt.managers.multimodal_processor import (
|
||||||
BaseMultimodalProcessor as SGLangBaseProcessor,
|
BaseMultimodalProcessor as SGLangBaseProcessor,
|
||||||
)
|
)
|
||||||
from sglang.srt.managers.multimodal_processors.base_processor import (
|
|
||||||
MultimodalSpecialTokens,
|
|
||||||
)
|
|
||||||
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
||||||
from sglang.srt.models.gemma3_mm import Gemma3ForConditionalGeneration
|
from sglang.srt.models.gemma3_mm import Gemma3ForConditionalGeneration
|
||||||
|
from sglang.srt.multimodal.processors.base_processor import MultimodalSpecialTokens
|
||||||
|
|
||||||
# Copied from: https://github.com/huggingface/transformers/blob/main/src/transformers/models/gemma3/image_processing_gemma3_fast.py
|
# Copied from: https://github.com/huggingface/transformers/blob/main/src/transformers/models/gemma3/image_processing_gemma3_fast.py
|
||||||
# will be removed in the future
|
# will be removed in the future
|
||||||
@@ -18,10 +18,8 @@ from typing import Dict, List, Optional, Union
|
|||||||
from sglang.srt.managers.multimodal_processor import (
|
from sglang.srt.managers.multimodal_processor import (
|
||||||
BaseMultimodalProcessor as SGLangBaseProcessor,
|
BaseMultimodalProcessor as SGLangBaseProcessor,
|
||||||
)
|
)
|
||||||
from sglang.srt.managers.multimodal_processors.base_processor import (
|
|
||||||
MultimodalSpecialTokens,
|
|
||||||
)
|
|
||||||
from sglang.srt.models.gemma3n_mm import Gemma3nForConditionalGeneration
|
from sglang.srt.models.gemma3n_mm import Gemma3nForConditionalGeneration
|
||||||
|
from sglang.srt.multimodal.processors.base_processor import MultimodalSpecialTokens
|
||||||
|
|
||||||
|
|
||||||
class Gemma3nSGLangProcessor(SGLangBaseProcessor):
|
class Gemma3nSGLangProcessor(SGLangBaseProcessor):
|
||||||
@@ -5,12 +5,12 @@ import torch
|
|||||||
from decord import VideoReader, cpu
|
from decord import VideoReader, cpu
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
from sglang.srt.managers.multimodal_processors.base_processor import (
|
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
||||||
|
from sglang.srt.models.internvl import InternVLChatModel
|
||||||
|
from sglang.srt.multimodal.processors.base_processor import (
|
||||||
BaseMultimodalProcessor,
|
BaseMultimodalProcessor,
|
||||||
MultimodalSpecialTokens,
|
MultimodalSpecialTokens,
|
||||||
)
|
)
|
||||||
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
|
||||||
from sglang.srt.models.internvl import InternVLChatModel
|
|
||||||
|
|
||||||
|
|
||||||
class InternVLImageProcessor(BaseMultimodalProcessor):
|
class InternVLImageProcessor(BaseMultimodalProcessor):
|
||||||
@@ -1,11 +1,11 @@
|
|||||||
from typing import List, Union
|
from typing import List, Union
|
||||||
|
|
||||||
from sglang.srt.managers.multimodal_processors.base_processor import (
|
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
||||||
|
from sglang.srt.models.deepseek_janus_pro import MultiModalityCausalLM
|
||||||
|
from sglang.srt.multimodal.processors.base_processor import (
|
||||||
BaseMultimodalProcessor,
|
BaseMultimodalProcessor,
|
||||||
MultimodalSpecialTokens,
|
MultimodalSpecialTokens,
|
||||||
)
|
)
|
||||||
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
|
||||||
from sglang.srt.models.deepseek_janus_pro import MultiModalityCausalLM
|
|
||||||
|
|
||||||
|
|
||||||
class JanusProImageProcessor(BaseMultimodalProcessor):
|
class JanusProImageProcessor(BaseMultimodalProcessor):
|
||||||
@@ -3,14 +3,12 @@ from typing import Any, Dict, List, Optional, Union
|
|||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from sglang.srt.managers.multimodal_processors.base_processor import (
|
|
||||||
BaseMultimodalProcessor as SGLangBaseProcessor,
|
|
||||||
)
|
|
||||||
from sglang.srt.managers.multimodal_processors.base_processor import (
|
|
||||||
MultimodalSpecialTokens,
|
|
||||||
)
|
|
||||||
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
||||||
from sglang.srt.models.kimi_vl import KimiVLForConditionalGeneration
|
from sglang.srt.models.kimi_vl import KimiVLForConditionalGeneration
|
||||||
|
from sglang.srt.multimodal.processors.base_processor import (
|
||||||
|
BaseMultimodalProcessor as SGLangBaseProcessor,
|
||||||
|
)
|
||||||
|
from sglang.srt.multimodal.processors.base_processor import MultimodalSpecialTokens
|
||||||
|
|
||||||
|
|
||||||
# Compatible with KimiVLForConditionalGeneration
|
# Compatible with KimiVLForConditionalGeneration
|
||||||
@@ -7,11 +7,7 @@ from transformers.models.auto.processing_auto import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
import sglang.srt.managers.multimodal_processor as sgl_mm_processor_utils
|
import sglang.srt.managers.multimodal_processor as sgl_mm_processor_utils
|
||||||
from sglang.srt.managers.multimodal_processors.base_processor import (
|
|
||||||
BaseMultimodalProcessor,
|
|
||||||
)
|
|
||||||
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
||||||
from sglang.srt.mm_utils import expand2square, process_anyres_image
|
|
||||||
from sglang.srt.models.llava import (
|
from sglang.srt.models.llava import (
|
||||||
LlavaForConditionalGeneration,
|
LlavaForConditionalGeneration,
|
||||||
LlavaLlamaForCausalLM,
|
LlavaLlamaForCausalLM,
|
||||||
@@ -20,6 +16,8 @@ from sglang.srt.models.llava import (
|
|||||||
)
|
)
|
||||||
from sglang.srt.models.llavavid import LlavaVidForCausalLM
|
from sglang.srt.models.llavavid import LlavaVidForCausalLM
|
||||||
from sglang.srt.models.mistral import Mistral3ForConditionalGeneration
|
from sglang.srt.models.mistral import Mistral3ForConditionalGeneration
|
||||||
|
from sglang.srt.multimodal.mm_utils import expand2square, process_anyres_image
|
||||||
|
from sglang.srt.multimodal.processors.base_processor import BaseMultimodalProcessor
|
||||||
from sglang.srt.utils import load_image, logger
|
from sglang.srt.utils import load_image, logger
|
||||||
from sglang.utils import get_exception_traceback
|
from sglang.utils import get_exception_traceback
|
||||||
|
|
||||||
@@ -2,13 +2,13 @@ from typing import List, Union
|
|||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from sglang.srt.managers.multimodal_processors.base_processor import (
|
|
||||||
BaseMultimodalProcessor,
|
|
||||||
MultimodalSpecialTokens,
|
|
||||||
)
|
|
||||||
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
||||||
from sglang.srt.models.minicpmo import MiniCPMO
|
from sglang.srt.models.minicpmo import MiniCPMO
|
||||||
from sglang.srt.models.minicpmv import MiniCPMV
|
from sglang.srt.models.minicpmv import MiniCPMV
|
||||||
|
from sglang.srt.multimodal.processors.base_processor import (
|
||||||
|
BaseMultimodalProcessor,
|
||||||
|
MultimodalSpecialTokens,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# Compatible with both 'O' and 'V'
|
# Compatible with both 'O' and 'V'
|
||||||
@@ -1,10 +1,8 @@
|
|||||||
from typing import List, Union
|
from typing import List, Union
|
||||||
|
|
||||||
from sglang.srt.managers.multimodal_processors.base_processor import (
|
|
||||||
BaseMultimodalProcessor,
|
|
||||||
)
|
|
||||||
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
||||||
from sglang.srt.models.mllama import MllamaForConditionalGeneration
|
from sglang.srt.models.mllama import MllamaForConditionalGeneration
|
||||||
|
from sglang.srt.multimodal.processors.base_processor import BaseMultimodalProcessor
|
||||||
from sglang.srt.utils import load_image
|
from sglang.srt.utils import load_image
|
||||||
|
|
||||||
|
|
||||||
@@ -7,12 +7,12 @@ from transformers.models.llama4.image_processing_llama4_fast import (
|
|||||||
get_best_fit,
|
get_best_fit,
|
||||||
)
|
)
|
||||||
|
|
||||||
from sglang.srt.managers.multimodal_processors.base_processor import (
|
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
||||||
|
from sglang.srt.models.mllama4 import Llama4ForConditionalGeneration
|
||||||
|
from sglang.srt.multimodal.processors.base_processor import (
|
||||||
BaseMultimodalProcessor,
|
BaseMultimodalProcessor,
|
||||||
MultimodalSpecialTokens,
|
MultimodalSpecialTokens,
|
||||||
)
|
)
|
||||||
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
|
||||||
from sglang.srt.models.mllama4 import Llama4ForConditionalGeneration
|
|
||||||
|
|
||||||
|
|
||||||
class Mllama4ImageProcessor(BaseMultimodalProcessor):
|
class Mllama4ImageProcessor(BaseMultimodalProcessor):
|
||||||
@@ -1,12 +1,12 @@
|
|||||||
import logging
|
import logging
|
||||||
from typing import List, Union
|
from typing import List, Union
|
||||||
|
|
||||||
from sglang.srt.managers.multimodal_processors.base_processor import (
|
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
||||||
|
from sglang.srt.models.phi4mm import Phi4MMForCausalLM
|
||||||
|
from sglang.srt.multimodal.processors.base_processor import (
|
||||||
BaseMultimodalProcessor,
|
BaseMultimodalProcessor,
|
||||||
MultimodalSpecialTokens,
|
MultimodalSpecialTokens,
|
||||||
)
|
)
|
||||||
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
|
||||||
from sglang.srt.models.phi4mm import Phi4MMForCausalLM
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -6,12 +6,12 @@ from transformers.models.pixtral.image_processing_pixtral import (
|
|||||||
_num_image_tokens as _get_pixtral_hf_num_image_tokens,
|
_num_image_tokens as _get_pixtral_hf_num_image_tokens,
|
||||||
)
|
)
|
||||||
|
|
||||||
from sglang.srt.managers.multimodal_processors.base_processor import (
|
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
||||||
|
from sglang.srt.models.pixtral import PixtralVisionModel
|
||||||
|
from sglang.srt.multimodal.processors.base_processor import (
|
||||||
BaseMultimodalProcessor,
|
BaseMultimodalProcessor,
|
||||||
MultimodalSpecialTokens,
|
MultimodalSpecialTokens,
|
||||||
)
|
)
|
||||||
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
|
||||||
from sglang.srt.models.pixtral import PixtralVisionModel
|
|
||||||
|
|
||||||
|
|
||||||
class PixtralProcessor(BaseMultimodalProcessor):
|
class PixtralProcessor(BaseMultimodalProcessor):
|
||||||
@@ -7,15 +7,13 @@ import torch
|
|||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
from sglang.srt.layers.rotary_embedding import MRotaryEmbedding
|
from sglang.srt.layers.rotary_embedding import MRotaryEmbedding
|
||||||
from sglang.srt.managers.multimodal_processors.base_processor import (
|
|
||||||
BaseMultimodalProcessor as SGLangBaseProcessor,
|
|
||||||
)
|
|
||||||
from sglang.srt.managers.multimodal_processors.base_processor import (
|
|
||||||
MultimodalSpecialTokens,
|
|
||||||
)
|
|
||||||
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
||||||
from sglang.srt.models.qwen2_5_vl import Qwen2_5_VLForConditionalGeneration
|
from sglang.srt.models.qwen2_5_vl import Qwen2_5_VLForConditionalGeneration
|
||||||
from sglang.srt.models.qwen2_vl import Qwen2VLForConditionalGeneration
|
from sglang.srt.models.qwen2_vl import Qwen2VLForConditionalGeneration
|
||||||
|
from sglang.srt.multimodal.processors.base_processor import (
|
||||||
|
BaseMultimodalProcessor as SGLangBaseProcessor,
|
||||||
|
)
|
||||||
|
from sglang.srt.multimodal.processors.base_processor import MultimodalSpecialTokens
|
||||||
|
|
||||||
|
|
||||||
# Compatible with Qwen2VL and Qwen2_5VL
|
# Compatible with Qwen2VL and Qwen2_5VL
|
||||||
@@ -10,12 +10,12 @@ from sglang.srt.managers.io_struct import (
|
|||||||
GenerateReqInput,
|
GenerateReqInput,
|
||||||
ImageDataItem,
|
ImageDataItem,
|
||||||
)
|
)
|
||||||
from sglang.srt.managers.multimodal_processors.base_processor import (
|
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
||||||
|
from sglang.srt.models.vila import VILAForConditionalGeneration
|
||||||
|
from sglang.srt.multimodal.processors.base_processor import (
|
||||||
BaseMultimodalProcessor,
|
BaseMultimodalProcessor,
|
||||||
MultimodalSpecialTokens,
|
MultimodalSpecialTokens,
|
||||||
)
|
)
|
||||||
from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
|
|
||||||
from sglang.srt.models.vila import VILAForConditionalGeneration
|
|
||||||
from sglang.srt.server_args import ServerArgs
|
from sglang.srt.server_args import ServerArgs
|
||||||
|
|
||||||
|
|
||||||
@@ -2577,3 +2577,13 @@ def configure_gc_logger():
|
|||||||
)
|
)
|
||||||
|
|
||||||
gc.callbacks.append(gc_callback)
|
gc.callbacks.append(gc_callback)
|
||||||
|
|
||||||
|
|
||||||
|
# COPIED FROM DeepGEMM
|
||||||
|
def align(x: int, y: int) -> int:
|
||||||
|
return ceil_div(x, y) * y
|
||||||
|
|
||||||
|
|
||||||
|
# COPIED FROM DeepGEMM
|
||||||
|
def ceil_div(x: int, y: int) -> int:
|
||||||
|
return (x + y - 1) // y
|
||||||
|
|||||||
@@ -23,15 +23,13 @@ from sglang.srt.configs.model_config import ModelConfig
|
|||||||
from sglang.srt.conversation import generate_chat_conv
|
from sglang.srt.conversation import generate_chat_conv
|
||||||
from sglang.srt.entrypoints.openai.protocol import ChatCompletionRequest
|
from sglang.srt.entrypoints.openai.protocol import ChatCompletionRequest
|
||||||
from sglang.srt.managers.mm_utils import embed_mm_inputs, init_embedding_cache
|
from sglang.srt.managers.mm_utils import embed_mm_inputs, init_embedding_cache
|
||||||
from sglang.srt.managers.multimodal_processors.base_processor import (
|
|
||||||
BaseMultimodalProcessor,
|
|
||||||
)
|
|
||||||
from sglang.srt.managers.schedule_batch import (
|
from sglang.srt.managers.schedule_batch import (
|
||||||
Modality,
|
Modality,
|
||||||
MultimodalDataItem,
|
MultimodalDataItem,
|
||||||
MultimodalInputs,
|
MultimodalInputs,
|
||||||
)
|
)
|
||||||
from sglang.srt.model_executor.model_runner import ModelRunner
|
from sglang.srt.model_executor.model_runner import ModelRunner
|
||||||
|
from sglang.srt.multimodal.processors.base_processor import BaseMultimodalProcessor
|
||||||
from sglang.srt.server_args import ServerArgs
|
from sglang.srt.server_args import ServerArgs
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user