add deepseekv3 and llama4

This commit is contained in:
Chranos
2026-02-11 15:09:59 +08:00
parent 45e1fa8bb3
commit 78814aaa68

View File

@@ -21,13 +21,10 @@ def set_default_torch_dtype(dtype: torch.dtype):
def get_model_architecture( def get_model_architecture(
model_config: ModelConfig) -> Tuple[Type[nn.Module], str]: model_config: ModelConfig) -> Tuple[Type[nn.Module], str]:
architectures = getattr(model_config.hf_config, "architectures", None) or [] architectures = getattr(model_config.hf_config, "architectures", None) or []
logger.warning("[DEBUG-ARCH] get_model_architecture: " print(f"[DEBUG-ARCH] get_model_architecture: "
"type(hf_config)=%s, architectures=%s, " f"type(hf_config)={type(model_config.hf_config).__name__}, "
"id(hf_config)=%s, has_text_config=%s", f"architectures={getattr(model_config.hf_config, 'architectures', 'MISSING')}, "
type(model_config.hf_config).__name__, f"id(hf_config)={id(model_config.hf_config)}")
getattr(model_config.hf_config, "architectures", "MISSING"),
id(model_config.hf_config),
hasattr(model_config.hf_config, "text_config"))
# Special handling for quantized Mixtral. # Special handling for quantized Mixtral.
# FIXME(woosuk): This is a temporary hack. # FIXME(woosuk): This is a temporary hack.
mixtral_supported = [ mixtral_supported = [