Refactor attention backend (#1381)
This commit is contained in:
@@ -96,23 +96,17 @@ class TestExtendAttention(unittest.TestCase):
|
||||
v_buffer,
|
||||
req_to_tokens,
|
||||
b_req_idx,
|
||||
b_start_loc,
|
||||
b_seq_len,
|
||||
b_seq_len_prefix,
|
||||
b_start_loc_extend,
|
||||
b_seq_len_extend,
|
||||
max_len_in_batch,
|
||||
b_start_loc_extend,
|
||||
max_len_extend,
|
||||
)
|
||||
|
||||
redundant_attention(
|
||||
q_extend,
|
||||
k_extend,
|
||||
v_extend,
|
||||
o_redundant,
|
||||
k_buffer,
|
||||
v_buffer,
|
||||
req_to_tokens,
|
||||
b_req_idx,
|
||||
b_start_loc,
|
||||
b_seq_len,
|
||||
|
||||
Reference in New Issue
Block a user