refactor apply_w8a8_block_fp8_linear in fp (#6545)

This commit is contained in:
ChangyiYang
2025-05-29 00:15:11 -07:00
committed by GitHub
parent 7e41290082
commit 485a023bd8
5 changed files with 283 additions and 120 deletions

View File

@@ -10,7 +10,9 @@ from vllm.model_executor.layers.quantization.utils.fp8_utils import (
w8a8_block_fp8_matmul as vllm_w8a8_block_fp8_matmul,
)
from sglang.srt.layers.quantization.fp8_kernel import w8a8_block_fp8_matmul
from sglang.srt.layers.quantization.fp8_kernel import (
w8a8_block_fp8_matmul_deepgemm as w8a8_block_fp8_matmul,
)
# Adapted from https://github.com/tile-ai/tilelang/blob/a8cfdce92795cb861c9033573534653ee040b5ed/examples/deepseek_deepgemm/example_deepgemm_fp8_2xAcc.py#L1