diff --git a/python/sglang/srt/layers/quantization/fp8_utils.py b/python/sglang/srt/layers/quantization/fp8_utils.py index 1599cf26b..7377ab73b 100644 --- a/python/sglang/srt/layers/quantization/fp8_utils.py +++ b/python/sglang/srt/layers/quantization/fp8_utils.py @@ -2,6 +2,8 @@ from typing import List, Optional, Tuple import torch +from sglang.srt.layers.quantization.fp8_kernel import sglang_per_token_group_quant_fp8 + try: from vllm import _custom_ops as vllm_ops