Fix sampling for speculative decoding & simplify kernels (#7207)
This commit is contained in:
@@ -72,6 +72,7 @@ from sgl_kernel.speculative import (
|
||||
tree_speculative_sampling_target_only,
|
||||
verify_tree_greedy,
|
||||
)
|
||||
from sgl_kernel.top_k import fast_topk
|
||||
from sgl_kernel.version import __version__
|
||||
|
||||
build_tree_kernel = (
|
||||
|
||||
Reference in New Issue
Block a user