118 lines
4.5 KiB
Python
118 lines
4.5 KiB
Python
"""
|
|
Patches transformers 4.55.3 to register qwen3_5 and qwen3_5_moe model types.
|
|
|
|
Deploy steps on the remote machine:
|
|
1. cp -r modified_scripts/qwen3_5 /usr/local/lib/python3.10/site-packages/transformers/models/qwen3_5
|
|
2. cp -r modified_scripts/qwen3_5_moe /usr/local/lib/python3.10/site-packages/transformers/models/qwen3_5_moe
|
|
3. python3 modified_scripts/patch_transformers_qwen3_5.py
|
|
|
|
Target: pip-installed transformers at /usr/local/lib/python3.10/site-packages/transformers/
|
|
(Not the corex pre-installed path at /usr/local/corex/lib64/python3/dist-packages/)
|
|
"""
|
|
|
|
import sys
|
|
|
|
TRANSFORMERS_ROOT = "/usr/local/lib/python3.10/site-packages/transformers"
|
|
AUTO_CONFIG = f"{TRANSFORMERS_ROOT}/models/auto/configuration_auto.py"
|
|
MODELS_INIT = f"{TRANSFORMERS_ROOT}/models/__init__.py"
|
|
|
|
|
|
def patch_file(path, replacements):
|
|
with open(path, "r") as f:
|
|
content = f.read()
|
|
|
|
patched = False
|
|
for old, new in replacements:
|
|
if new in content:
|
|
print(f" [skip] already patched: {repr(new[:60])}")
|
|
continue
|
|
if old not in content:
|
|
print(f" [warn] anchor not found: {repr(old[:60])}")
|
|
continue
|
|
content = content.replace(old, new, 1)
|
|
patched = True
|
|
print(f" [ok] inserted after: {repr(old[:60])}")
|
|
|
|
if patched:
|
|
with open(path, "w") as f:
|
|
f.write(content)
|
|
|
|
|
|
def main():
|
|
print(f"=== Patching {AUTO_CONFIG} ===")
|
|
patch_file(AUTO_CONFIG, [
|
|
# CONFIG_MAPPING_NAMES: insert qwen3_5 + qwen3_5_moe right after qwen3
|
|
(
|
|
'("qwen3", "Qwen3Config"),',
|
|
'("qwen3", "Qwen3Config"),\n ("qwen3_5", "Qwen3_5Config"),\n ("qwen3_5_moe", "Qwen3_5MoeConfig"),',
|
|
),
|
|
(
|
|
'("qwen3", "Qwen3Config")\n',
|
|
'("qwen3", "Qwen3Config"),\n ("qwen3_5", "Qwen3_5Config"),\n ("qwen3_5_moe", "Qwen3_5MoeConfig"),\n',
|
|
),
|
|
# MODEL_NAMES_MAPPING (model_type -> human readable name)
|
|
(
|
|
'("qwen3", "Qwen3"),',
|
|
'("qwen3", "Qwen3"),\n ("qwen3_5", "Qwen3_5"),\n ("qwen3_5_moe", "Qwen3_5_MoE"),',
|
|
),
|
|
(
|
|
'("qwen3", "Qwen3")\n',
|
|
'("qwen3", "Qwen3"),\n ("qwen3_5", "Qwen3_5"),\n ("qwen3_5_moe", "Qwen3_5_MoE"),\n',
|
|
),
|
|
])
|
|
|
|
print(f"\n=== Patching {MODELS_INIT} ===")
|
|
patch_file(MODELS_INIT, [
|
|
(
|
|
"from .qwen3 import *\n",
|
|
"from .qwen3 import *\n from .qwen3_5 import *\n from .qwen3_5_moe import *\n",
|
|
),
|
|
])
|
|
|
|
# Verification
|
|
print("\n=== Verification ===")
|
|
try:
|
|
import importlib.util, types
|
|
|
|
def _load_config_mod(module_name, file_path):
|
|
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
|
mod = importlib.util.module_from_spec(spec)
|
|
mod.__package__ = ".".join(module_name.split(".")[:-1])
|
|
pkg = sys.modules.setdefault("transformers", types.ModuleType("transformers"))
|
|
pkg.__path__ = [TRANSFORMERS_ROOT]
|
|
cu = sys.modules.setdefault(
|
|
"transformers.configuration_utils", types.ModuleType("transformers.configuration_utils"))
|
|
class _PC:
|
|
def __init__(self, **kwargs): pass
|
|
cu.PretrainedConfig = _PC
|
|
for sub in ("transformers.models", f"transformers.models.{module_name.split('.')[-2]}"):
|
|
m = sys.modules.setdefault(sub, types.ModuleType(sub))
|
|
m.__path__ = [TRANSFORMERS_ROOT]
|
|
spec.loader.exec_module(mod)
|
|
return mod
|
|
|
|
mod27 = _load_config_mod(
|
|
"transformers.models.qwen3_5.configuration_qwen3_5",
|
|
f"{TRANSFORMERS_ROOT}/models/qwen3_5/configuration_qwen3_5.py",
|
|
)
|
|
cfg = mod27.Qwen3_5Config()
|
|
print(f" Qwen3_5Config() smoke-test OK (model_type={cfg.model_type})")
|
|
|
|
mod35 = _load_config_mod(
|
|
"transformers.models.qwen3_5_moe.configuration_qwen3_5_moe",
|
|
f"{TRANSFORMERS_ROOT}/models/qwen3_5_moe/configuration_qwen3_5_moe.py",
|
|
)
|
|
moe_cfg = mod35.Qwen3_5MoeConfig()
|
|
print(f" Qwen3_5MoeConfig() smoke-test OK (model_type={moe_cfg.model_type})")
|
|
t = moe_cfg.text_config
|
|
print(f" num_experts={t.num_experts}, top_k={t.num_experts_per_tok}, "
|
|
f"shared={t.shared_expert_intermediate_size}, layers={t.num_hidden_layers}")
|
|
except Exception as e:
|
|
print(f" [warn] smoke-test failed (may be fine at runtime): {e}")
|
|
|
|
print("\nDone.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|