support mooncake store dp attention (#9684)

This commit is contained in:
huangtingwei
2025-08-28 12:31:31 +08:00
committed by GitHub
parent e1f7cf57dc
commit 55349e361d
2 changed files with 4 additions and 4 deletions

View File

@@ -7,7 +7,6 @@ from functools import wraps
import psutil
import torch
from sglang.srt.distributed import get_tensor_model_parallel_rank
from sglang.srt.mem_cache.memory_pool import KVCache, MHATokenToKVPool, MLATokenToKVPool
from sglang.srt.utils import is_npu
@@ -464,8 +463,7 @@ class MHATokenToKVPoolHost(HostKVCache):
else:
raise ValueError(f"Unsupported layout: {self.layout}")
def get_buffer_meta(self, keys, indices):
local_rank = get_tensor_model_parallel_rank()
def get_buffer_meta(self, keys, indices, local_rank):
ptr_list = []
key_list = []
kv_buffer_data_ptr = self.kv_buffer.data_ptr()
@@ -704,7 +702,7 @@ class MLATokenToKVPoolHost(HostKVCache):
else:
raise ValueError(f"Unsupported layout: {self.layout}")
def get_buffer_meta(self, keys, indices):
def get_buffer_meta(self, keys, indices, local_rank):
ptr_list = []
key_list = []
kv_buffer_data_ptr = self.kv_buffer.data_ptr()