[Bugfix] Fix Qwen3/DSV3/DSV3.2 model support (#11510)
This commit is contained in:
@@ -1357,6 +1357,7 @@ class DeepseekV2AttentionMLA(nn.Module):
|
||||
inner_state = self.mla_preprocess.forward(
|
||||
positions, hidden_states, forward_batch, zero_allocator
|
||||
)
|
||||
inner_state = (*inner_state, None) # add a position for topk_indices
|
||||
elif attn_forward_method == AttnForwardMethod.NPU_MLA_SPARSE:
|
||||
inner_state = self.forward_npu_sparse_prepare(
|
||||
positions, hidden_states, forward_batch, zero_allocator
|
||||
|
||||
Reference in New Issue
Block a user