Optimize topk operation in llama4 (#5128)

This commit is contained in:
fzyzcjy
2025-04-09 17:50:22 +08:00
committed by GitHub
parent 92823069c4
commit 86a876d883
4 changed files with 18 additions and 15 deletions

View File

@@ -31,11 +31,15 @@ from sglang.srt.speculative.eagle_utils import (
EagleVerifyInput,
EagleVerifyOutput,
assign_draft_cache_locs,
fast_topk,
select_top_k_tokens,
)
from sglang.srt.speculative.spec_info import SpeculativeAlgorithm
from sglang.srt.utils import empty_context, get_available_gpu_memory, is_cuda_available
from sglang.srt.utils import (
empty_context,
fast_topk,
get_available_gpu_memory,
is_cuda_available,
)
if is_cuda_available():
from sgl_kernel import segment_packbits