[Feature] Speculative decoding support lookahead (#9873)
Co-authored-by: a4zhangfei <a4zhangfei@qq.com> Co-authored-by: Qiaolin-Yu <liin1211@outlook.com>
This commit is contained in:
@@ -457,6 +457,16 @@ void verify_tree_greedy(
|
||||
at::Tensor target_predict,
|
||||
int64_t cuda_stream = 0);
|
||||
|
||||
void reconstruct_indices_from_tree_mask(
|
||||
at::Tensor tree_mask,
|
||||
at::Tensor verified_seq_len,
|
||||
at::Tensor positions, // mutable
|
||||
at::Tensor retrive_index, // mutable
|
||||
at::Tensor retrive_next_token, // mutable
|
||||
at::Tensor retrive_next_sibling, // mutable
|
||||
int64_t batch_size,
|
||||
int64_t draft_token_num);
|
||||
|
||||
void build_tree_kernel_efficient(
|
||||
at::Tensor parent_list,
|
||||
at::Tensor selected_index,
|
||||
|
||||
Reference in New Issue
Block a user