From a7028ae481af2b198d3d946afa14f20e7a1ab33d Mon Sep 17 00:00:00 2001 From: Chranos <826995883@qq.com> Date: Tue, 10 Feb 2026 14:15:33 +0800 Subject: [PATCH] add gemma3 --- vllm-v0.6.2/vllm/model_executor/models/gemma3.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/vllm-v0.6.2/vllm/model_executor/models/gemma3.py b/vllm-v0.6.2/vllm/model_executor/models/gemma3.py index 2c701ae..f6fe766 100644 --- a/vllm-v0.6.2/vllm/model_executor/models/gemma3.py +++ b/vllm-v0.6.2/vllm/model_executor/models/gemma3.py @@ -177,14 +177,17 @@ class Gemma3Attention(nn.Module): is_neox_style=True, ) else: - # Global attention uses rope_scaling from config - rope_scaling = getattr(config, "rope_scaling", None) + # Global attention: extract rope_base and rope_scaling. + # Prioritize rope_parameters dict (newer transformers) to + # avoid passing nested dicts that are unhashable. + rope_scaling = None rope_base = self.rope_theta - if rope_scaling is None and isinstance(rope_params, dict): - # Try to extract from rope_parameters (newer transformers) + if isinstance(rope_params, dict): + # Transformers v5: per layer_type sub-dicts if "full_attention" in rope_params: rp = rope_params["full_attention"] else: + # Transformers v4: flat dict rp = rope_params rope_base = rp.get("rope_theta", self.rope_theta) rtype = rp.get("rope_type", None) @@ -193,8 +196,9 @@ class Gemma3Attention(nn.Module): k: v for k, v in rp.items() if k not in ("rope_theta",) } - rope_scaling["type"] = rope_scaling.pop("rope_type", - rtype) + else: + # Fallback: old-style config.rope_scaling (flat dict) + rope_scaling = getattr(config, "rope_scaling", None) self.rotary_emb = get_rope( self.head_dim, rotary_dim=self.head_dim,