[VLM] Support chunk prefill for VLM (#6355)
Co-authored-by: yizhang2077 <1109276519@qq.com>
This commit is contained in:
@@ -116,6 +116,10 @@ class ModelConfig:
|
||||
self.is_audio_model = enable_multimodal and is_audio_model(
|
||||
self.hf_config.architectures
|
||||
)
|
||||
self.is_multimodal_chunked_prefill_supported = (
|
||||
enable_multimodal
|
||||
and is_multimodal_chunked_prefill_supported(self.hf_config.architectures)
|
||||
)
|
||||
self.is_encoder_decoder = is_encoder_decoder_model(self.hf_config.architectures)
|
||||
self.dtype = _get_and_verify_dtype(self.hf_text_config, dtype)
|
||||
|
||||
@@ -574,6 +578,21 @@ def is_encoder_decoder_model(model_architectures: List[str]):
|
||||
return "MllamaForConditionalGeneration" in model_architectures
|
||||
|
||||
|
||||
def is_multimodal_chunked_prefill_supported(model_architectures: List[str]):
|
||||
"""Check if chunked prefill is supported for a MultiModal model."""
|
||||
unsupported = [
|
||||
"Grok1VForCausalLM",
|
||||
"Grok1AForCausalLM",
|
||||
"LlavaLlamaForCausalLM",
|
||||
"MllamaForConditionalGeneration",
|
||||
"CLIPModel",
|
||||
]
|
||||
if any(multi_model_arch in unsupported for multi_model_arch in model_architectures):
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
def yarn_get_mscale(scale: float = 1, mscale: float = 1) -> float:
|
||||
if scale <= 1:
|
||||
return 1.0
|
||||
|
||||
Reference in New Issue
Block a user