Revert "[XPU][CPU] Enable the native path of DeepSeek" (#4367)
This commit is contained in:
@@ -1,7 +1,6 @@
|
||||
# Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/layers/quantization/modelopt.py
|
||||
|
||||
import logging
|
||||
import sys
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import torch
|
||||
@@ -21,7 +20,6 @@ from sglang.srt.layers.quantization.base_config import (
|
||||
QuantizeMethodBase,
|
||||
)
|
||||
from sglang.srt.layers.quantization.fp8_utils import apply_fp8_linear
|
||||
from sglang.srt.utils import get_device_capability
|
||||
|
||||
# Initialize logger for the module
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -54,20 +52,7 @@ class ModelOptFp8Config(QuantizationConfig):
|
||||
|
||||
@classmethod
|
||||
def get_min_capability(cls) -> int:
|
||||
if hasattr(torch, "cuda") and torch.cuda.is_available():
|
||||
return 89
|
||||
|
||||
# Vendors can update
|
||||
return sys.maxsize
|
||||
|
||||
@classmethod
|
||||
def get_availability(cls) -> bool:
|
||||
major, minor = get_device_capability()
|
||||
if hasattr(torch, "cuda") and torch.cuda.is_available():
|
||||
return major * 10 + minor > 89
|
||||
|
||||
# Vendors can update
|
||||
return False
|
||||
return 89 # Minimum hardware capability (e.g., Hopper GPUs).
|
||||
|
||||
@classmethod
|
||||
def get_config_filenames(cls) -> List[str]:
|
||||
|
||||
Reference in New Issue
Block a user