chore: bump transformer to 4.54.0 (#8416)

Co-authored-by: Binyao Jiang <byjiang1996@gmail.com> Co-authored-by: Lifu Huang <lifu.hlf@gmail.com>
2025-07-27 21:27:25 -07:00
parent 2810338401
commit 4ad9737045
7 changed files with 62 additions and 52 deletions
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -45,7 +45,7 @@ runtime_common = [
    "soundfile==0.13.1",
    "scipy",
    "torchao==0.9.0",
-    "transformers==4.53.2",
+    "transformers==4.54.0",
    "timm==1.0.16",
    "uvicorn",
    "uvloop",
--- a/python/sglang/srt/models/llava.py
+++ b/python/sglang/srt/models/llava.py
@@ -656,11 +656,15 @@ class LlavaForConditionalGeneration(LlavaBaseForCausalLM):
        self, auto_model_type: Type[AutoModel]
    ) -> Dict[str, str]:
        mapping = {}
-        for config_cls, archs in auto_model_type._model_mapping.items():
-            if isinstance(archs, tuple):
-                mapping[config_cls.__name__] = tuple(arch.__name__ for arch in archs)
-            else:
-                mapping[config_cls.__name__] = archs.__name__
+        for config_cls in auto_model_type._model_mapping.keys():
+            archs = auto_model_type._model_mapping.get(config_cls, None)
+            if archs is not None:
+                if isinstance(archs, tuple):
+                    mapping[config_cls.__name__] = tuple(
+                        arch.__name__ for arch in archs
+                    )
+                else:
+                    mapping[config_cls.__name__] = archs.__name__
        return mapping

    def __init__(
--- a/python/sglang/srt/models/minicpmo.py
+++ b/python/sglang/srt/models/minicpmo.py
@@ -1134,7 +1134,10 @@ class MiniCPMWhisperEncoderLayer(nn.Module):
        """
        residual = hidden_states
        hidden_states = self.self_attn_layer_norm(hidden_states)
-        hidden_states, attn_weights, past_key_values = self.self_attn(
+        # TODO (lifuhuang): confirmed with Mick that the logic for past_key_values is copied from minicpmo official code,
+        # currently we are not using past_key_values at all. We need to redesign the caching logic when we support streaming
+        # in the future.
+        hidden_states, attn_weights = self.self_attn(
            hidden_states=hidden_states,
            attention_mask=attention_mask,
            layer_head_mask=layer_head_mask,