From cd641a995c7423acfcb483d5c208d52d8c5e49f5 Mon Sep 17 00:00:00 2001 From: GuoweiWangU Date: Wed, 24 Sep 2025 17:29:32 +0800 Subject: [PATCH] fix bailing_moe with enable_dp_attention (#10860) --- python/sglang/srt/models/bailing_moe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/models/bailing_moe.py b/python/sglang/srt/models/bailing_moe.py index 0797f4f6f..2d1929ead 100644 --- a/python/sglang/srt/models/bailing_moe.py +++ b/python/sglang/srt/models/bailing_moe.py @@ -45,12 +45,12 @@ from sglang.srt.layers.dp_attention import ( get_attention_dp_size, get_attention_tp_rank, get_attention_tp_size, + is_dp_attention_enabled, ) from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.linear import ( MergedColumnParallelLinear, QKVParallelLinear, - ReplicatedLinear, RowParallelLinear, ) from sglang.srt.layers.logits_processor import LogitsProcessor @@ -702,7 +702,7 @@ class BailingMoEModel(nn.Module): self.embed_dim, quant_config=quant_config, prefix=add_prefix("word_embeddings", prefix), - use_attn_tp_group=global_server_args_dict["enable_dp_lm_head"], + enable_tp=not is_dp_attention_enabled(), ) else: self.word_embeddings = PPMissingLayer()