Fuse quantize and rope in trtllm_mla MTP (#10779)

This commit is contained in:
fzyzcjy
2025-10-02 17:59:37 +08:00
committed by GitHub
parent d61615fe93
commit f35def8652
2 changed files with 37 additions and 5 deletions

View File

@@ -1399,7 +1399,10 @@ class DeepseekV2AttentionMLA(nn.Module):
"""
return (
self.current_attention_backend == "trtllm_mla"
and forward_batch.forward_mode.is_decode_or_idle()
and (
forward_batch.forward_mode.is_decode_or_idle()
or forward_batch.forward_mode.is_target_verify()
)
and forward_batch.attn_backend.data_type == torch.float8_e4m3fn
)