[VLM] Adopt fast image processor by default (#5065)

This commit is contained in:
Mick
2025-04-12 12:46:58 +08:00
committed by GitHub
parent 611720919d
commit 34ef6c8135
12 changed files with 163 additions and 98 deletions

View File

@@ -462,6 +462,7 @@ class Qwen2_5_VLForConditionalGeneration(nn.Module):
quant_config=quant_config,
prefix=add_prefix("lm_head", prefix),
)
self.is_mrope_enabled = "mrope_section" in self.config.rope_scaling
self.logits_processor = LogitsProcessor(config)
self.pooler = Pooler(pooling_type=PoolingType.LAST, normalize=True)
@@ -515,15 +516,14 @@ class Qwen2_5_VLForConditionalGeneration(nn.Module):
otherwise it will be `(seq_len,).
(Use input_metadata.mrope_positions to replace it)
"""
is_mrope_enabled = "mrope_section" in self.config.rope_scaling
if is_mrope_enabled:
if self.is_mrope_enabled:
positions = forward_batch.mrope_positions
if not (
forward_batch.forward_mode.is_decode()
or not forward_batch.contains_image_inputs()
):
if is_mrope_enabled:
if self.is_mrope_enabled:
assert positions.ndim == 2 and positions.size(0) == 3, (
"multimodal section rotary embedding requires "
f"(3, seq_len) positions, but got {positions.size()}"