[XPU][CPU] Enable the native path of DeepSeek (#4086)

Co-authored-by: Zhang, Liangang <liangang.zhang@intel.com>
2025-03-13 13:26:29 +08:00
parent c76040e31b
commit 71046fcd71
16 changed files with 501 additions and 223 deletions
--- a/python/sglang/srt/layers/quantization/modelopt_quant.py
+++ b/python/sglang/srt/layers/quantization/modelopt_quant.py
@@ -1,6 +1,7 @@
 # Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/layers/quantization/modelopt.py

 import logging
+import sys
 from typing import Any, Dict, List, Optional

 import torch
@@ -20,6 +21,7 @@ from sglang.srt.layers.quantization.base_config import (
    QuantizeMethodBase,
 )
 from sglang.srt.layers.quantization.fp8_utils import apply_fp8_linear
+from sglang.srt.utils import get_device_capability

 # Initialize logger for the module
 logger = logging.getLogger(__name__)
@@ -52,7 +54,20 @@ class ModelOptFp8Config(QuantizationConfig):

    @classmethod
    def get_min_capability(cls) -> int:
-        return 89  # Minimum hardware capability (e.g., Hopper GPUs).
+        if hasattr(torch, "cuda") and torch.cuda.is_available():
+            return 89
+
+        # Vendors can update
+        return sys.maxsize
+
+    @classmethod
+    def get_availability(cls) -> bool:
+        major, minor = get_device_capability()
+        if hasattr(torch, "cuda") and torch.cuda.is_available():
+            return major * 10 + minor > 89
+
+        # Vendors can update
+        return False

    @classmethod
    def get_config_filenames(cls) -> List[str]: