From aaf0ad8cdf3d945c1d6f326d2343c736e9571640 Mon Sep 17 00:00:00 2001 From: Stefan He Date: Thu, 7 Aug 2025 15:50:52 -0700 Subject: [PATCH] remove vllm fp8quant from fp8.py (#8937) --- python/sglang/srt/layers/quantization/fp8.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/sglang/srt/layers/quantization/fp8.py b/python/sglang/srt/layers/quantization/fp8.py index 17e1b7868..956264fc9 100644 --- a/python/sglang/srt/layers/quantization/fp8.py +++ b/python/sglang/srt/layers/quantization/fp8.py @@ -98,9 +98,6 @@ if _is_hip and (_use_aiter or _use_hip_int4): from aiter.fused_moe import fused_moe from aiter.ops.shuffle import shuffle_weight -if not (_is_cuda or _is_npu or (_is_cpu and _is_cpu_amx_available) or _is_hip): - from vllm._custom_ops import scaled_fp8_quant - ACTIVATION_SCHEMES = ["static", "dynamic"]