fix(disagg): fix sending KV cache in case of MLA for NIXL backend (#10673)
This commit is contained in:
@@ -547,7 +547,7 @@ class NixlKVManager(CommonKVManager):
|
|||||||
notif = "_".join([str(req.room), "kv", str(chunk_id), str(int(is_last))])
|
notif = "_".join([str(req.room), "kv", str(chunk_id), str(int(is_last))])
|
||||||
decode_tp_size = self.decode_kv_args_table[req.agent_name].decode_tp_size
|
decode_tp_size = self.decode_kv_args_table[req.agent_name].decode_tp_size
|
||||||
|
|
||||||
if decode_tp_size == self.attn_tp_size:
|
if self.is_mla_backend or (decode_tp_size == self.attn_tp_size):
|
||||||
kv_xfer_handle = self.send_kvcache(
|
kv_xfer_handle = self.send_kvcache(
|
||||||
req.agent_name,
|
req.agent_name,
|
||||||
kv_indices,
|
kv_indices,
|
||||||
|
|||||||
Reference in New Issue
Block a user