vlm: enable radix cache for qwen-vl models (#5349)

Co-authored-by: Xinyuan Tong <justinning0323@outlook.com>
This commit is contained in:
Mick
2025-04-24 12:35:05 +09:00
committed by GitHub
parent 7d0edf3cae
commit c998d04b46
26 changed files with 429 additions and 331 deletions

View File

@@ -73,15 +73,14 @@ class ModelConfig:
)
if enable_multimodal is None:
if self.hf_config.architectures[0] == "Llama4ForConditionalGeneration":
mm_disabled_models = [
"Gemma3ForConditionalGeneration",
"Llama4ForConditionalGeneration",
]
if self.hf_config.architectures[0] in mm_disabled_models:
enable_multimodal = False
logger.info(
"Multimodal is disabled for Llama4. To enable it, set --enable-llama4-multimodal."
)
elif self.hf_config.architectures[0] == "Gemma3ForConditionalGeneration":
enable_multimodal = False
logger.info(
"Multimodal is disabled for Gemma3. To enable it, set --enable-gemma3-multimodal."
f"Multimodal is disabled for {self.hf_config.model_type}. To enable it, set --enable-multimodal."
)
else:
enable_multimodal = True