[XPU][CPU] Enable the native path of DeepSeek (#4086)
Co-authored-by: Zhang, Liangang <liangang.zhang@intel.com>
This commit is contained in:
@@ -108,15 +108,25 @@ def _get_quantization_config(
|
||||
quant_config = get_quant_config(model_config, load_config)
|
||||
major, minor = get_device_capability()
|
||||
|
||||
if major is not None and minor is not None:
|
||||
assert 0 <= minor < 10
|
||||
capability = major * 10 + minor
|
||||
if capability < quant_config.get_min_capability():
|
||||
if not hasattr(quant_config, "get_availability"):
|
||||
# Update VLLM to support get_available
|
||||
if major is not None and minor is not None:
|
||||
assert 0 <= minor < 10
|
||||
capability = major * 10 + minor
|
||||
if capability < quant_config.get_min_capability():
|
||||
raise ValueError(
|
||||
f"The quantization method {model_config.quantization} "
|
||||
"is not supported for the current GPU. "
|
||||
f"Minimum capability: {quant_config.get_min_capability()}. "
|
||||
f"Current capability: {capability}."
|
||||
)
|
||||
else:
|
||||
if not quant_config.get_availability():
|
||||
raise ValueError(
|
||||
f"The quantization method {model_config.quantization} "
|
||||
"is not supported for the current GPU. "
|
||||
f"Minimum capability: {quant_config.get_min_capability()}. "
|
||||
f"Current capability: {capability}."
|
||||
f"Current capability: {major, minor}."
|
||||
)
|
||||
supported_dtypes = quant_config.get_supported_act_dtypes()
|
||||
if model_config.dtype not in supported_dtypes:
|
||||
|
||||
Reference in New Issue
Block a user