[CPU] Fix TP padding issue on Phi-4 (#8289)

This commit is contained in:
blzheng
2025-08-18 07:25:26 +08:00
committed by GitHub
parent b341b7dbce
commit ebbb75e917
5 changed files with 93 additions and 27 deletions

View File

@@ -129,6 +129,25 @@ def get_config(
config = AutoConfig.from_pretrained(
model, trust_remote_code=trust_remote_code, revision=revision, **kwargs
)
if (
config.architectures is not None
and config.architectures[0] == "Phi4MMForCausalLM"
):
# Phi4MMForCausalLM uses a hard-coded vision_config. See:
# https://github.com/vllm-project/vllm/blob/6071e989df1531b59ef35568f83f7351afb0b51e/vllm/model_executor/models/phi4mm.py#L71
# We set it here to support cases where num_attention_heads is not divisible by the TP size.
from transformers import SiglipVisionConfig
vision_config = {
"hidden_size": 1152,
"image_size": 448,
"intermediate_size": 4304,
"model_type": "siglip_vision_model",
"num_attention_heads": 16,
"num_hidden_layers": 26, # Model is originally 27-layer, we only need the first 26 layers for feature extraction.
"patch_size": 14,
}
config.vision_config = SiglipVisionConfig(**vision_config)
text_config = get_hf_text_config(config=config)
if isinstance(model, str) and text_config is not None: