[fix]missing prefix_lens_cpu init when p/d disaggregation (#11196)
This commit is contained in:
@@ -523,11 +523,19 @@ class DecodePreallocQueue:
|
|||||||
dtype=torch.int64,
|
dtype=torch.int64,
|
||||||
device=self.token_to_kv_pool_allocator.device,
|
device=self.token_to_kv_pool_allocator.device,
|
||||||
),
|
),
|
||||||
|
prefix_lens_cpu=torch.tensor(
|
||||||
|
[0],
|
||||||
|
dtype=torch.int64,
|
||||||
|
),
|
||||||
seq_lens=torch.tensor(
|
seq_lens=torch.tensor(
|
||||||
[num_tokens],
|
[num_tokens],
|
||||||
dtype=torch.int64,
|
dtype=torch.int64,
|
||||||
device=self.token_to_kv_pool_allocator.device,
|
device=self.token_to_kv_pool_allocator.device,
|
||||||
),
|
),
|
||||||
|
seq_lens_cpu=torch.tensor(
|
||||||
|
[num_tokens],
|
||||||
|
dtype=torch.int64,
|
||||||
|
),
|
||||||
last_loc=torch.tensor(
|
last_loc=torch.tensor(
|
||||||
[-1],
|
[-1],
|
||||||
dtype=torch.int64,
|
dtype=torch.int64,
|
||||||
|
|||||||
Reference in New Issue
Block a user