[7/n] decouple quantization impl from vllm dependency - gguf kernel (#11019)

This commit is contained in:
PGFLMG
2025-10-12 05:04:57 +08:00
committed by GitHub
parent b5dcfd4154
commit 8fdcd98efe
19 changed files with 7936 additions and 1 deletions

View File

@@ -288,10 +288,19 @@ from sgl_kernel.moe import (
fp8_blockwise_scaled_grouped_mm,
moe_align_block_size,
moe_fused_gate,
moe_sum,
moe_sum_reduce,
prepare_moe_input,
topk_softmax,
)
from sgl_kernel.quantization import (
ggml_dequantize,
ggml_moe_a8,
ggml_moe_a8_vec,
ggml_moe_get_block_size,
ggml_mul_mat_a8,
ggml_mul_mat_vec_a8,
)
from sgl_kernel.sampling import (
min_p_sampling_from_probs,
top_k_mask_logits,