add deepseekv3 and llama4
This commit is contained in:
@@ -483,6 +483,7 @@ class _ModelRegistry:
|
|||||||
architectures = [architectures]
|
architectures = [architectures]
|
||||||
if not architectures:
|
if not architectures:
|
||||||
logger.warning("No model architectures are specified")
|
logger.warning("No model architectures are specified")
|
||||||
|
return []
|
||||||
|
|
||||||
return architectures
|
return architectures
|
||||||
|
|
||||||
|
|||||||
@@ -229,6 +229,14 @@ def get_config(
|
|||||||
model_type = MODEL_FOR_CAUSAL_LM_MAPPING_NAMES[config.model_type]
|
model_type = MODEL_FOR_CAUSAL_LM_MAPPING_NAMES[config.model_type]
|
||||||
config.update({"architectures": [model_type]})
|
config.update({"architectures": [model_type]})
|
||||||
|
|
||||||
|
# Some composite config classes (e.g. Llama4Config) may not preserve
|
||||||
|
# the 'architectures' field from config.json. Restore it from the
|
||||||
|
# raw config_dict if needed.
|
||||||
|
if config_format == ConfigFormat.HF:
|
||||||
|
raw_archs = config_dict.get("architectures")
|
||||||
|
if raw_archs and not getattr(config, "architectures", None):
|
||||||
|
config.architectures = raw_archs
|
||||||
|
|
||||||
patch_rope_scaling(config)
|
patch_rope_scaling(config)
|
||||||
|
|
||||||
if trust_remote_code:
|
if trust_remote_code:
|
||||||
|
|||||||
Reference in New Issue
Block a user