Optimized deepseek-v3/r1 model performance on mxfp4 run (#10008)
Co-authored-by: wunhuang <wunhuang@amd.com> Co-authored-by: HAI <hixiao@gmail.com> Co-authored-by: Hubert Lu <55214931+hubertlu-tw@users.noreply.github.com>
This commit is contained in:
@@ -2900,6 +2900,18 @@ def mxfp_supported():
|
||||
return False
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def is_gfx95_supported():
|
||||
"""
|
||||
Returns whether the current platform supports MX types.
|
||||
"""
|
||||
if torch.version.hip:
|
||||
gcn_arch = torch.cuda.get_device_properties(0).gcnArchName
|
||||
return any(gfx in gcn_arch for gfx in ["gfx95"])
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
# LoRA-related constants and utilities
|
||||
SUPPORTED_LORA_TARGET_MODULES = [
|
||||
"q_proj",
|
||||
|
||||
Reference in New Issue
Block a user