[Feat] QWen-1M context support[2/2]: Update block sparse attention backend (#5949)

This commit is contained in:
PGFLMG
2025-08-07 14:49:36 +08:00
committed by GitHub
parent a69b637014
commit b7cd743038
15 changed files with 2121 additions and 4 deletions

View File

@@ -76,6 +76,9 @@ class ScheduleBatchDisaggregationDecodeMixin:
req_pool_indices, dtype=torch.int64, device=self.device
)
self.seq_lens = torch.tensor(seq_lens, dtype=torch.int64, device=self.device)
self.orig_seq_lens = torch.tensor(
seq_lens, dtype=torch.int32, device=self.device
)
self.out_cache_loc = out_cache_loc
self.seq_lens_sum = sum(seq_lens)