[feature]Ascend quantization support (#7791)

Co-authored-by: ichernob <ichernobnn@gmail.com>
Co-authored-by: liupeng <liupeng374@huawei.com>
This commit is contained in:
ronnie_zheng
2025-07-10 19:17:37 +03:00
committed by GitHub
parent 4a0d19198b
commit 766392c6bd
13 changed files with 889 additions and 34 deletions

View File

@@ -197,7 +197,7 @@ def get_int_env_var(name: str, default: int = 0) -> int:
def support_triton(backend: str) -> bool:
return backend not in ["torch_native", "intel_amx"]
return backend not in ["torch_native", "intel_amx", "ascend"]
try:
@@ -2782,3 +2782,101 @@ def lru_cache_frozenset(maxsize=128):
return wrapper
return decorator
def apply_module_patch(target_module, target_function, wrappers):
original_module, original_function = parse_module_path(
target_module, target_function, False
)
original_function_id = id(original_function)
candidate = original_function
for wrapper in wrappers:
candidate = wrapper(candidate)
if target_function is not None:
setattr(original_module, target_function, candidate)
for key, value in sys.modules.copy().items():
if (
target_function is not None
and hasattr(value, target_function)
and id(getattr(value, target_function)) == original_function_id
):
setattr(value, target_function, candidate)
def parse_module_path(module_path, function_name, create_dummy):
from importlib.machinery import ModuleSpec
def create_dummy_module(full_path, parent=None):
"""Create and register a placeholder module"""
dummy = types.ModuleType(full_path)
dummy.__file__ = "vllm_ascend.dummy_module.py"
dummy.__spec__ = ModuleSpec(full_path, None)
sys.modules[full_path] = dummy
if parent:
setattr(parent, full_path.split(".")[-1], dummy)
return dummy
def create_placeholder_function(func_name):
"""Create dummy function that raises when called"""
def placeholder(*args, **kwargs):
raise NotImplementedError(f"Function {func_name} is a placeholder")
placeholder.__name__ = func_name
return placeholder
modules = module_path.split(".")
current_module = None
processed_path = []
for idx, part in enumerate(modules):
current_path = ".".join(modules[: idx + 1])
parent_path = ".".join(modules[:idx]) if idx > 0 else None
try:
current_module = importlib.import_module(current_path)
except ModuleNotFoundError:
# Handle missing module
parent = importlib.import_module(parent_path) if parent_path else None
if parent and hasattr(parent, part):
# Use existing attribute from parent
current_module = getattr(parent, part)
# Check for early function resolution
if function_name and hasattr(current_module, function_name):
return current_module, getattr(current_module, function_name)
if function_name and create_dummy:
ph_func = create_placeholder_function(function_name)
setattr(current_module, function_name, ph_func)
return current_module, ph_func
if function_name:
raise AttributeError(
f"Function {function_name} missing in {current_path}"
)
else:
if not create_dummy:
raise
# Create and register dummy module
current_module = create_dummy_module(
current_path,
parent=(
importlib.import_module(parent_path) if parent_path else None
),
)
processed_path.append(part)
# Final function handling
final_module = sys.modules[module_path]
if function_name is not None:
if not hasattr(final_module, function_name):
if create_dummy:
ph_func = create_placeholder_function(function_name)
setattr(final_module, function_name, ph_func)
else:
setattr(final_module, function_name, None)
return final_module, getattr(final_module, function_name)
return final_module, None