From 8abe8deae6cdcfb0ea5f3c7ced376459594fc48e Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Fri, 29 Aug 2025 23:24:14 -0700 Subject: [PATCH] fix: dsv3 lite q_lora_rank none (#9815) --- python/sglang/srt/models/deepseek_v2.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index 30df6afcd..6058488a1 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -2414,18 +2414,26 @@ class DeepseekV2ForCausalLM(nn.Module): ) num_hidden_layers = 1 if is_nextn else self.config.num_hidden_layers + for layer_id in range(num_hidden_layers): if is_nextn: layer = self.model.decoder else: layer = self.model.layers[layer_id] - for module in [ - layer.self_attn.fused_qkv_a_proj_with_mqa, - layer.self_attn.q_b_proj, + module_list = [ layer.self_attn.kv_b_proj, layer.self_attn.o_proj, - ]: + ] + + if self.config.q_lora_rank is not None: + module_list.append(layer.self_attn.fused_qkv_a_proj_with_mqa) + module_list.append(layer.self_attn.q_b_proj) + else: + module_list.append(layer.self_attn.kv_a_proj_with_mqa) + module_list.append(layer.self_attn.q_proj) + + for module in module_list: requant_weight_ue8m0_inplace( module.weight, module.weight_scale_inv, weight_block_size )