Support penalty in overlap mode; return logprob with chunked prefill; improve benchmark scripts (#3988)
Co-authored-by: SangBin Cho <rkooo567@gmail.com> Co-authored-by: dhou-xai <dhou@x.ai> Co-authored-by: Hanming Lu <hanming_lu@berkeley.edu>
This commit is contained in:
@@ -111,6 +111,8 @@ else:
|
||||
"cublas_grouped_gemm",
|
||||
"custom_dispose",
|
||||
"custom_reduce",
|
||||
"build_tree_kernel_efficient",
|
||||
"build_tree_kernel",
|
||||
"fp8_blockwise_scaled_mm",
|
||||
"fp8_scaled_mm",
|
||||
"fused_add_rmsnorm",
|
||||
@@ -127,12 +129,10 @@ else:
|
||||
"register_graph_buffers",
|
||||
"rmsnorm",
|
||||
"sampling_scaling_penalties",
|
||||
"sgl_per_token_group_quant_fp8",
|
||||
"silu_and_mul",
|
||||
"top_k_renorm_prob",
|
||||
"top_k_top_p_sampling_from_probs",
|
||||
"top_p_renorm_prob",
|
||||
"tree_speculative_sampling_target_only",
|
||||
"build_tree_kernel_efficient",
|
||||
"build_tree_kernel",
|
||||
"sgl_per_token_group_quant_fp8",
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user