Fix illegal memory access in overlap mode & Use more fused triton kernels for building meta data (#2051)

This commit is contained in:
Lianmin Zheng
2024-11-16 16:14:23 -08:00
committed by GitHub
parent 976bc302e5
commit edad373135
7 changed files with 198 additions and 83 deletions

View File

@@ -73,7 +73,7 @@ class SamplingBatchInfo:
top_ks=top_ks,
min_ps=min_ps,
need_min_p_sampling=any(r.sampling_params.min_p > 0 for r in reqs),
is_all_greedy=top_ks.max().item() <= 1,
is_all_greedy=all(r.sampling_params.top_k <= 1 for r in reqs),
vocab_size=vocab_size,
device=device,
)