[refactor] slightly tidy fp8 module (#5993)

This commit is contained in:
JieXin Liang
2025-05-08 08:28:24 +08:00
committed by GitHub
parent e444c13fb4
commit b70957fcf8
12 changed files with 238 additions and 231 deletions

View File

@@ -14,11 +14,6 @@ if not _is_cuda:
from vllm._custom_ops import scaled_fp8_quant
def is_fp8_fnuz() -> bool:
# only device 0 is checked, this assumes MI300 platforms are homogeneous
return "gfx94" in torch.cuda.get_device_properties(0).gcnArchName
def is_layer_skipped(
prefix: str,
ignored_layers: List[str],