[3/4] Speed up CSGMV backend perf by 10% through dynamic chunking + kernel optimization (#10592)
This commit is contained in:
@@ -19,6 +19,9 @@ class LoRABatchInfo:
|
||||
# Number of segments. For triton backend, it is equal to batch size.
|
||||
num_segments: int
|
||||
|
||||
# Maximum segment length of current batch
|
||||
max_len: int
|
||||
|
||||
# Indice pointers of each segment in shape (num_segments + 1, )
|
||||
seg_indptr: torch.Tensor
|
||||
|
||||
@@ -34,9 +37,6 @@ class LoRABatchInfo:
|
||||
# Lengths of each segments in shape (num_segments,)
|
||||
seg_lens: Optional[torch.Tensor]
|
||||
|
||||
# Maximum segment length of current batch
|
||||
max_len: Optional[int]
|
||||
|
||||
# The logical (re)ordering of input rows (tokens), in shape (num_tokens,)
|
||||
permutation: Optional[torch.Tensor]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user