add deepseekv3 and llama4

This commit is contained in:
Chranos
2026-02-11 14:32:37 +08:00
parent 96ed925486
commit 153bc4ec7b

View File

@@ -252,19 +252,27 @@ def forward_prefill(
updated_slot_mapping = attn_metadata.slot_mapping
if self.attn.kv_cache_dtype == 'int8':
key_cache_scale = kv_cache[1][0]
mlu_ops.quant_to_paged_cache(key_value,
mlu_ops.quant_to_linear_cache(key_value,
None,
key_cache,
None,
key_cache_scale,
None,
attn_metadata.cu_seq_lens,
attn_metadata.max_seq_len,
True, None,
attn_metadata.batch_ids,
attn_metadata.slot_mapping_unpaged)
else:
mlu_ops.reshape_linear_cache(key_value,
None,
key_cache,
None,
key_cache_scale,
None,
updated_slot_mapping.flatten())
else:
mlu_ops.reshape_paged_cache(key_value,
None,
key_cache,
None,
updated_slot_mapping.flatten())
attn_metadata.cu_seq_lens,
attn_metadata.max_seq_len,
True, None,
attn_metadata.batch_ids,
attn_metadata.slot_mapping_unpaged)
'''
==================
End of MLU Hijack