[refactor] slightly tidy fp8 module (#5993)

2025-05-08 08:28:24 +08:00
parent e444c13fb4
commit b70957fcf8
12 changed files with 238 additions and 231 deletions
--- a/python/sglang/srt/layers/quantization/utils.py
+++ b/python/sglang/srt/layers/quantization/utils.py
@@ -14,11 +14,6 @@ if not _is_cuda:
    from vllm._custom_ops import scaled_fp8_quant


-def is_fp8_fnuz() -> bool:
-    # only device 0 is checked, this assumes MI300 platforms are homogeneous
-    return "gfx94" in torch.cuda.get_device_properties(0).gcnArchName
-
-
 def is_layer_skipped(
    prefix: str,
    ignored_layers: List[str],