Support OCP MXFP4 quantization on AMD GPUs (#8255)

Co-authored-by: wunhuang <wunhuang@amd.com> Co-authored-by: Hubert Lu <Hubert.Lu@amd.com>
2025-08-05 09:14:52 +08:00
parent 7cb20754fa
commit d4bf5a8524
12 changed files with 1159 additions and 1 deletions
--- a/python/sglang/srt/utils.py
+++ b/python/sglang/srt/utils.py
@@ -2832,6 +2832,17 @@ def parse_module_path(module_path, function_name, create_dummy):
    return final_module, None


+def mxfp_supported():
+    """
+    Returns whether the current platform supports MX types.
+    """
+    if torch.version.hip:
+        gcn_arch = torch.cuda.get_device_properties(0).gcnArchName
+        return any(gfx in gcn_arch for gfx in ["gfx95"])
+    else:
+        return False
+
+
 # LoRA-related constants and utilities
 SUPPORTED_LORA_TARGET_MODULES = [
    "q_proj",