[Feat] QWen-1M context support[2/2]: Update block sparse attention backend (#5949)
This commit is contained in:
@@ -76,6 +76,9 @@ class ScheduleBatchDisaggregationDecodeMixin:
|
||||
req_pool_indices, dtype=torch.int64, device=self.device
|
||||
)
|
||||
self.seq_lens = torch.tensor(seq_lens, dtype=torch.int64, device=self.device)
|
||||
self.orig_seq_lens = torch.tensor(
|
||||
seq_lens, dtype=torch.int32, device=self.device
|
||||
)
|
||||
self.out_cache_loc = out_cache_loc
|
||||
self.seq_lens_sum = sum(seq_lens)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user