From 1fd0cf8a7b9672c74f5740d66cb5a6a3cb7016f9 Mon Sep 17 00:00:00 2001 From: Michael Feil <63565275+michaelfeil@users.noreply.github.com> Date: Sat, 15 Mar 2025 21:14:29 -0700 Subject: [PATCH] Update comment in qwen2.py (#4447) --- python/sglang/srt/models/qwen2_moe.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/sglang/srt/models/qwen2_moe.py b/python/sglang/srt/models/qwen2_moe.py index b5ef472ce..5c3c8d612 100644 --- a/python/sglang/srt/models/qwen2_moe.py +++ b/python/sglang/srt/models/qwen2_moe.py @@ -170,6 +170,7 @@ class Qwen2MoeAttention(nn.Module): rope_theta: float = 10000, rope_scaling: Optional[Dict[str, Any]] = None, max_position_embeddings: int = 8192, + qkv_bias: int = True, quant_config: Optional[QuantizationConfig] = None, prefix: str = "", ) -> None: @@ -201,7 +202,7 @@ class Qwen2MoeAttention(nn.Module): self.head_dim, self.total_num_heads, self.total_num_kv_heads, - bias=True, + bias=qkv_bias, quant_config=quant_config, prefix=add_prefix("qkv_proj", prefix), ) @@ -257,6 +258,8 @@ class Qwen2MoeDecoderLayer(nn.Module): rope_theta = getattr(config, "rope_theta", 10000) rope_scaling = getattr(config, "rope_scaling", None) max_position_embeddings = getattr(config, "max_position_embeddings", 8192) + # note: replace config.num_hidden_layers < 80 with True once its available in transformers 4.50.0 + qkv_bias = getattr(config, "qkv_bias", config.num_hidden_layers < 80) self.self_attn = Qwen2MoeAttention( hidden_size=self.hidden_size, num_heads=config.num_attention_heads, @@ -266,6 +269,7 @@ class Qwen2MoeDecoderLayer(nn.Module): rope_scaling=rope_scaling, max_position_embeddings=max_position_embeddings, quant_config=quant_config, + qkv_bias=qkv_bias, prefix=add_prefix("self_attn", prefix), )