106 lines
3.6 KiB
Python
106 lines
3.6 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|
|
|
from collections.abc import Iterable, Mapping
|
|
from types import MappingProxyType
|
|
from typing import Any, Optional
|
|
|
|
import regex as re
|
|
|
|
|
|
def deep_compare(dict1: Any, dict2: Any) -> bool:
|
|
if type(dict1) is not type(dict2):
|
|
return False
|
|
if isinstance(dict1, dict):
|
|
if dict1.keys() != dict2.keys():
|
|
return False
|
|
return all(deep_compare(dict1[k], dict2[k]) for k in dict1)
|
|
elif isinstance(dict1, list):
|
|
return set(dict1) == set(dict2)
|
|
else:
|
|
return dict1 == dict2
|
|
|
|
|
|
def should_ignore_layer(
|
|
layer_name: Optional[str],
|
|
ignore: Iterable[str],
|
|
fused_mapping: Mapping[str, list[str]] = MappingProxyType({})
|
|
) -> bool:
|
|
if layer_name is None:
|
|
return False
|
|
|
|
# layer_name = model.layers.0.self_attn.qkv_proj
|
|
# proj_name = qkv_proj
|
|
proj_name = layer_name.split(".")[-1]
|
|
|
|
# Fused layers like gate_up_proj or qkv_proj will not be fused
|
|
# in the safetensors checkpoint. So, we convert the name
|
|
# from the fused version to unfused + check to make sure that
|
|
# each shard of the fused layer has the same scheme.
|
|
if proj_name in fused_mapping:
|
|
shard_proj_names = fused_mapping[proj_name]
|
|
|
|
# Convert fused_name --> [shard_names]
|
|
shard_names = [
|
|
layer_name.replace(proj_name, shard_proj_name)
|
|
for shard_proj_name in shard_proj_names
|
|
]
|
|
|
|
# Layer should be ignored if shards are ignored.
|
|
should_ignore_layer = None
|
|
for shard_name in shard_names:
|
|
should_ignore_shard = check_equal_or_regex_match(
|
|
layer_name=shard_name, targets=ignore)
|
|
|
|
# If shard_idx=0, set layer ignore to match shard.
|
|
if should_ignore_layer is None:
|
|
should_ignore_layer = should_ignore_shard
|
|
|
|
# If shard_idx=1+ confirm scheme matches prior shards.
|
|
elif should_ignore_shard != should_ignore_layer:
|
|
raise ValueError(f"Found a different quantization schemes for "
|
|
f"{shard_proj_names} in {layer_name}. vLLM "
|
|
"requires all to use the same scheme.")
|
|
|
|
# Unfused layers like down_proj and o_proj will match
|
|
# the safetensors checkpoint already.
|
|
else:
|
|
should_ignore_layer = check_equal_or_regex_match(layer_name=layer_name,
|
|
targets=ignore)
|
|
|
|
assert should_ignore_layer is not None
|
|
return should_ignore_layer
|
|
|
|
|
|
def check_equal_or_regex_match(layer_name: str,
|
|
targets: Iterable[str]) -> bool:
|
|
"""
|
|
Checks whether a layer_name is exactly equal or a regex match for
|
|
if target starts with 're:' to any target in list.
|
|
"""
|
|
for target in targets:
|
|
if _is_equal_or_regex_match(layer_name, target):
|
|
return True
|
|
return False
|
|
|
|
|
|
def _is_equal_or_regex_match(value: str,
|
|
target: str,
|
|
check_contains: bool = False) -> bool:
|
|
"""
|
|
Checks whether a value is exactly equal or a regex match for target
|
|
if target starts with 're:'. If check_contains is set to True,
|
|
additionally checks if the target string is contained within the value.
|
|
"""
|
|
|
|
if target.startswith("re:"):
|
|
pattern = target[3:]
|
|
if re.match(pattern, value):
|
|
return True
|
|
elif check_contains:
|
|
if target.lower() in value.lower():
|
|
return True
|
|
elif target == value:
|
|
return True
|
|
return False
|