chore: bump transformer to 4.54.0 (#8416)

Co-authored-by: Binyao Jiang <byjiang1996@gmail.com>
Co-authored-by: Lifu Huang <lifu.hlf@gmail.com>
This commit is contained in:
Stefan He
2025-07-27 21:27:25 -07:00
committed by GitHub
parent 2810338401
commit 4ad9737045
7 changed files with 62 additions and 52 deletions

View File

@@ -45,7 +45,7 @@ runtime_common = [
"soundfile==0.13.1",
"scipy",
"torchao==0.9.0",
"transformers==4.53.2",
"transformers==4.54.0",
"timm==1.0.16",
"uvicorn",
"uvloop",

View File

@@ -656,11 +656,15 @@ class LlavaForConditionalGeneration(LlavaBaseForCausalLM):
self, auto_model_type: Type[AutoModel]
) -> Dict[str, str]:
mapping = {}
for config_cls, archs in auto_model_type._model_mapping.items():
if isinstance(archs, tuple):
mapping[config_cls.__name__] = tuple(arch.__name__ for arch in archs)
else:
mapping[config_cls.__name__] = archs.__name__
for config_cls in auto_model_type._model_mapping.keys():
archs = auto_model_type._model_mapping.get(config_cls, None)
if archs is not None:
if isinstance(archs, tuple):
mapping[config_cls.__name__] = tuple(
arch.__name__ for arch in archs
)
else:
mapping[config_cls.__name__] = archs.__name__
return mapping
def __init__(

View File

@@ -1134,7 +1134,10 @@ class MiniCPMWhisperEncoderLayer(nn.Module):
"""
residual = hidden_states
hidden_states = self.self_attn_layer_norm(hidden_states)
hidden_states, attn_weights, past_key_values = self.self_attn(
# TODO (lifuhuang): confirmed with Mick that the logic for past_key_values is copied from minicpmo official code,
# currently we are not using past_key_values at all. We need to redesign the caching logic when we support streaming
# in the future.
hidden_states, attn_weights = self.self_attn(
hidden_states=hidden_states,
attention_mask=attention_mask,
layer_head_mask=layer_head_mask,