Optimized deepseek-v3/r1 model performance on mxfp4 run (#10008)

Co-authored-by: wunhuang <wunhuang@amd.com> Co-authored-by: HAI <hixiao@gmail.com> Co-authored-by: Hubert Lu <55214931+hubertlu-tw@users.noreply.github.com>
2025-09-05 06:11:22 +08:00
parent 93088b6975
commit e96973742c
8 changed files with 486 additions and 64 deletions
--- a/python/sglang/srt/utils.py
+++ b/python/sglang/srt/utils.py
@@ -2900,6 +2900,18 @@ def mxfp_supported():
        return False


+@lru_cache(maxsize=1)
+def is_gfx95_supported():
+    """
+    Returns whether the current platform supports MX types.
+    """
+    if torch.version.hip:
+        gcn_arch = torch.cuda.get_device_properties(0).gcnArchName
+        return any(gfx in gcn_arch for gfx in ["gfx95"])
+    else:
+        return False
+
+
 # LoRA-related constants and utilities
 SUPPORTED_LORA_TARGET_MODULES = [
    "q_proj",