[Feature] Speculative decoding support lookahead (#9873)

Co-authored-by: a4zhangfei <a4zhangfei@qq.com>
Co-authored-by: Qiaolin-Yu <liin1211@outlook.com>
This commit is contained in:
Zhihao Zhang
2025-09-19 07:42:41 +08:00
committed by GitHub
parent 2a2ff9a840
commit e7bc600304
30 changed files with 2058 additions and 32 deletions

View File

@@ -291,6 +291,12 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
"Tensor target_predict, int cuda_stream) -> ()");
m.impl("verify_tree_greedy", torch::kCUDA, &verify_tree_greedy);
m.def(
"reconstruct_indices_from_tree_mask(Tensor tree_mask, Tensor verified_seq_len, Tensor positions, "
"Tensor retrive_index, Tensor retrive_next_token, Tensor retrive_next_sibling, "
"int batch_size, int draft_token_num) -> ()");
m.impl("reconstruct_indices_from_tree_mask", torch::kCUDA, &reconstruct_indices_from_tree_mask);
m.def(
"build_tree_kernel_efficient(Tensor parent_list, Tensor selected_index, Tensor verified_seq_len, "
"Tensor! tree_mask, Tensor! positions, Tensor! retrive_index, Tensor! retrive_next_token, "