model: adapt mllama4 to VisionAttention (#8512)
Co-authored-by: root <mickjagger19@icloud.com>
This commit is contained in:
@@ -70,7 +70,6 @@ from sglang.srt.managers.io_struct import (
|
||||
BatchMultimodalOut,
|
||||
BatchStrOut,
|
||||
BatchTokenIDOut,
|
||||
BlockReqType,
|
||||
CloseSessionReqInput,
|
||||
ConfigureLoggingReq,
|
||||
EmbeddingReqInput,
|
||||
@@ -202,13 +201,29 @@ class TokenizerManager:
|
||||
|
||||
if self.model_config.is_multimodal:
|
||||
import_processors()
|
||||
_processor = get_processor(
|
||||
server_args.tokenizer_path,
|
||||
tokenizer_mode=server_args.tokenizer_mode,
|
||||
trust_remote_code=server_args.trust_remote_code,
|
||||
revision=server_args.revision,
|
||||
use_fast=not server_args.disable_fast_image_processor,
|
||||
)
|
||||
try:
|
||||
_processor = get_processor(
|
||||
server_args.tokenizer_path,
|
||||
tokenizer_mode=server_args.tokenizer_mode,
|
||||
trust_remote_code=server_args.trust_remote_code,
|
||||
revision=server_args.revision,
|
||||
use_fast=not server_args.disable_fast_image_processor,
|
||||
)
|
||||
except ValueError as e:
|
||||
error_message = str(e)
|
||||
if "does not have a slow version" in error_message:
|
||||
logger.info(
|
||||
f"Processor {server_args.tokenizer_path} does not have a slow version. Automatically use fast version"
|
||||
)
|
||||
_processor = get_processor(
|
||||
server_args.tokenizer_path,
|
||||
tokenizer_mode=server_args.tokenizer_mode,
|
||||
trust_remote_code=server_args.trust_remote_code,
|
||||
revision=server_args.revision,
|
||||
use_fast=True,
|
||||
)
|
||||
else:
|
||||
raise e
|
||||
transport_mode = _determine_tensor_transport_mode(self.server_args)
|
||||
|
||||
# We want to parallelize the image pre-processing so we create an executor for it
|
||||
|
||||
Reference in New Issue
Block a user