[refactor] slightly tidy fp8 module (#5993)
This commit is contained in:
@@ -14,11 +14,6 @@ if not _is_cuda:
|
||||
from vllm._custom_ops import scaled_fp8_quant
|
||||
|
||||
|
||||
def is_fp8_fnuz() -> bool:
|
||||
# only device 0 is checked, this assumes MI300 platforms are homogeneous
|
||||
return "gfx94" in torch.cuda.get_device_properties(0).gcnArchName
|
||||
|
||||
|
||||
def is_layer_skipped(
|
||||
prefix: str,
|
||||
ignored_layers: List[str],
|
||||
|
||||
Reference in New Issue
Block a user