refactor apply_w8a8_block_fp8_linear in fp (#6545)
This commit is contained in:
@@ -10,7 +10,9 @@ from vllm.model_executor.layers.quantization.utils.fp8_utils import (
|
||||
w8a8_block_fp8_matmul as vllm_w8a8_block_fp8_matmul,
|
||||
)
|
||||
|
||||
from sglang.srt.layers.quantization.fp8_kernel import w8a8_block_fp8_matmul
|
||||
from sglang.srt.layers.quantization.fp8_kernel import (
|
||||
w8a8_block_fp8_matmul_deepgemm as w8a8_block_fp8_matmul,
|
||||
)
|
||||
|
||||
|
||||
# Adapted from https://github.com/tile-ai/tilelang/blob/a8cfdce92795cb861c9033573534653ee040b5ed/examples/deepseek_deepgemm/example_deepgemm_fp8_2xAcc.py#L1
|
||||
|
||||
Reference in New Issue
Block a user