From aaf0ad8cdf3d945c1d6f326d2343c736e9571640 Mon Sep 17 00:00:00 2001
From: Stefan He <hebiaobuaa@gmail.com>
Date: Thu, 7 Aug 2025 15:50:52 -0700
Subject: [PATCH] remove vllm fp8quant from fp8.py (#8937)

---
 python/sglang/srt/layers/quantization/fp8.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/python/sglang/srt/layers/quantization/fp8.py b/python/sglang/srt/layers/quantization/fp8.py
index 17e1b7868..956264fc9 100644
--- a/python/sglang/srt/layers/quantization/fp8.py
+++ b/python/sglang/srt/layers/quantization/fp8.py
@@ -98,9 +98,6 @@ if _is_hip and (_use_aiter or _use_hip_int4):
     from aiter.fused_moe import fused_moe
     from aiter.ops.shuffle import shuffle_weight
 
-if not (_is_cuda or _is_npu or (_is_cpu and _is_cpu_amx_available) or _is_hip):
-    from vllm._custom_ops import scaled_fp8_quant
-
 
 ACTIVATION_SCHEMES = ["static", "dynamic"]