[Bugfix] Fix Qwen3/DSV3/DSV3.2 model support (#11510)

This commit is contained in:
Even Zhou
2025-10-16 15:14:09 +08:00
committed by GitHub
parent b0d20cdec7
commit 3cceaa381a
12 changed files with 102 additions and 33 deletions

View File

@@ -1357,6 +1357,7 @@ class DeepseekV2AttentionMLA(nn.Module):
inner_state = self.mla_preprocess.forward(
positions, hidden_states, forward_batch, zero_allocator
)
inner_state = (*inner_state, None) # add a position for topk_indices
elif attn_forward_method == AttnForwardMethod.NPU_MLA_SPARSE:
inner_state = self.forward_npu_sparse_prepare(
positions, hidden_states, forward_batch, zero_allocator