[CPU] Fix TP padding issue on Phi-4 (#8289)
This commit is contained in:
@@ -129,6 +129,25 @@ def get_config(
|
||||
config = AutoConfig.from_pretrained(
|
||||
model, trust_remote_code=trust_remote_code, revision=revision, **kwargs
|
||||
)
|
||||
if (
|
||||
config.architectures is not None
|
||||
and config.architectures[0] == "Phi4MMForCausalLM"
|
||||
):
|
||||
# Phi4MMForCausalLM uses a hard-coded vision_config. See:
|
||||
# https://github.com/vllm-project/vllm/blob/6071e989df1531b59ef35568f83f7351afb0b51e/vllm/model_executor/models/phi4mm.py#L71
|
||||
# We set it here to support cases where num_attention_heads is not divisible by the TP size.
|
||||
from transformers import SiglipVisionConfig
|
||||
|
||||
vision_config = {
|
||||
"hidden_size": 1152,
|
||||
"image_size": 448,
|
||||
"intermediate_size": 4304,
|
||||
"model_type": "siglip_vision_model",
|
||||
"num_attention_heads": 16,
|
||||
"num_hidden_layers": 26, # Model is originally 27-layer, we only need the first 26 layers for feature extraction.
|
||||
"patch_size": 14,
|
||||
}
|
||||
config.vision_config = SiglipVisionConfig(**vision_config)
|
||||
text_config = get_hf_text_config(config=config)
|
||||
|
||||
if isinstance(model, str) and text_config is not None:
|
||||
|
||||
Reference in New Issue
Block a user