initial commit for qwen3.6-moe adaptation

This commit is contained in:
2026-06-12 10:10:49 +08:00
parent 365da18436
commit 629f878c28
6 changed files with 560 additions and 49 deletions

View File

@@ -1,9 +1,10 @@
"""
Patches transformers 4.55.3 to register the qwen3_5 model type.
Patches transformers 4.55.3 to register qwen3_5 and qwen3_5_moe model types.
Deploy steps on the remote machine:
1. cp -r modified_scripts/qwen3_5 /usr/local/lib/python3.10/site-packages/transformers/models/qwen3_5
2. python3 modified_scripts/patch_transformers_qwen3_5.py
1. cp -r modified_scripts/qwen3_5 /usr/local/lib/python3.10/site-packages/transformers/models/qwen3_5
2. cp -r modified_scripts/qwen3_5_moe /usr/local/lib/python3.10/site-packages/transformers/models/qwen3_5_moe
3. python3 modified_scripts/patch_transformers_qwen3_5.py
Target: pip-installed transformers at /usr/local/lib/python3.10/site-packages/transformers/
(Not the corex pre-installed path at /usr/local/corex/lib64/python3/dist-packages/)
@@ -40,24 +41,23 @@ def patch_file(path, replacements):
def main():
print(f"=== Patching {AUTO_CONFIG} ===")
patch_file(AUTO_CONFIG, [
# CONFIG_MAPPING_NAMES: insert qwen3_5 right after qwen3
# CONFIG_MAPPING_NAMES: insert qwen3_5 + qwen3_5_moe right after qwen3
(
'("qwen3", "Qwen3Config"),',
'("qwen3", "Qwen3Config"),\n ("qwen3_5", "Qwen3_5Config"),',
'("qwen3", "Qwen3Config"),\n ("qwen3_5", "Qwen3_5Config"),\n ("qwen3_5_moe", "Qwen3_5MoeConfig"),',
),
# Some versions don't have trailing comma — handle that too
(
'("qwen3", "Qwen3Config")\n',
'("qwen3", "Qwen3Config"),\n ("qwen3_5", "Qwen3_5Config"),\n',
'("qwen3", "Qwen3Config"),\n ("qwen3_5", "Qwen3_5Config"),\n ("qwen3_5_moe", "Qwen3_5MoeConfig"),\n',
),
# MODEL_NAMES_MAPPING (model_type -> human readable name, used by docstring generator)
# MODEL_NAMES_MAPPING (model_type -> human readable name)
(
'("qwen3", "Qwen3"),',
'("qwen3", "Qwen3"),\n ("qwen3_5", "Qwen3_5"),',
'("qwen3", "Qwen3"),\n ("qwen3_5", "Qwen3_5"),\n ("qwen3_5_moe", "Qwen3_5_MoE"),',
),
(
'("qwen3", "Qwen3")\n',
'("qwen3", "Qwen3"),\n ("qwen3_5", "Qwen3_5"),\n',
'("qwen3", "Qwen3"),\n ("qwen3_5", "Qwen3_5"),\n ("qwen3_5_moe", "Qwen3_5_MoE"),\n',
),
])
@@ -65,7 +65,7 @@ def main():
patch_file(MODELS_INIT, [
(
"from .qwen3 import *\n",
"from .qwen3 import *\n from .qwen3_5 import *\n",
"from .qwen3 import *\n from .qwen3_5 import *\n from .qwen3_5_moe import *\n",
),
])
@@ -74,19 +74,39 @@ def main():
try:
import importlib.util, types
# Quick smoke-test: import the config class directly
spec = importlib.util.spec_from_file_location(
"configuration_qwen3_5",
def _load_config_mod(module_name, file_path):
spec = importlib.util.spec_from_file_location(module_name, file_path)
mod = importlib.util.module_from_spec(spec)
mod.__package__ = ".".join(module_name.split(".")[:-1])
pkg = sys.modules.setdefault("transformers", types.ModuleType("transformers"))
pkg.__path__ = [TRANSFORMERS_ROOT]
cu = sys.modules.setdefault(
"transformers.configuration_utils", types.ModuleType("transformers.configuration_utils"))
class _PC:
def __init__(self, **kwargs): pass
cu.PretrainedConfig = _PC
for sub in ("transformers.models", f"transformers.models.{module_name.split('.')[-2]}"):
m = sys.modules.setdefault(sub, types.ModuleType(sub))
m.__path__ = [TRANSFORMERS_ROOT]
spec.loader.exec_module(mod)
return mod
mod27 = _load_config_mod(
"transformers.models.qwen3_5.configuration_qwen3_5",
f"{TRANSFORMERS_ROOT}/models/qwen3_5/configuration_qwen3_5.py",
)
mod = importlib.util.module_from_spec(spec)
# Provide minimal parent package stubs so relative imports resolve
pkg = types.ModuleType("transformers")
pkg.__path__ = [TRANSFORMERS_ROOT]
sys.modules.setdefault("transformers", pkg)
spec.loader.exec_module(mod)
cfg = mod.Qwen3_5Config()
print(f" Qwen3_5Config() smoke-test OK (model_type={cfg.model_type})")
cfg = mod27.Qwen3_5Config()
print(f" Qwen3_5Config() smoke-test OK (model_type={cfg.model_type})")
mod35 = _load_config_mod(
"transformers.models.qwen3_5_moe.configuration_qwen3_5_moe",
f"{TRANSFORMERS_ROOT}/models/qwen3_5_moe/configuration_qwen3_5_moe.py",
)
moe_cfg = mod35.Qwen3_5MoeConfig()
print(f" Qwen3_5MoeConfig() smoke-test OK (model_type={moe_cfg.model_type})")
t = moe_cfg.text_config
print(f" num_experts={t.num_experts}, top_k={t.num_experts_per_tok}, "
f"shared={t.shared_expert_intermediate_size}, layers={t.num_hidden_layers}")
except Exception as e:
print(f" [warn] smoke-test failed (may be fine at runtime): {e}")