add deepseekv3 and llama4
This commit is contained in:
@@ -21,13 +21,10 @@ def set_default_torch_dtype(dtype: torch.dtype):
|
|||||||
def get_model_architecture(
|
def get_model_architecture(
|
||||||
model_config: ModelConfig) -> Tuple[Type[nn.Module], str]:
|
model_config: ModelConfig) -> Tuple[Type[nn.Module], str]:
|
||||||
architectures = getattr(model_config.hf_config, "architectures", None) or []
|
architectures = getattr(model_config.hf_config, "architectures", None) or []
|
||||||
logger.warning("[DEBUG-ARCH] get_model_architecture: "
|
print(f"[DEBUG-ARCH] get_model_architecture: "
|
||||||
"type(hf_config)=%s, architectures=%s, "
|
f"type(hf_config)={type(model_config.hf_config).__name__}, "
|
||||||
"id(hf_config)=%s, has_text_config=%s",
|
f"architectures={getattr(model_config.hf_config, 'architectures', 'MISSING')}, "
|
||||||
type(model_config.hf_config).__name__,
|
f"id(hf_config)={id(model_config.hf_config)}")
|
||||||
getattr(model_config.hf_config, "architectures", "MISSING"),
|
|
||||||
id(model_config.hf_config),
|
|
||||||
hasattr(model_config.hf_config, "text_config"))
|
|
||||||
# Special handling for quantized Mixtral.
|
# Special handling for quantized Mixtral.
|
||||||
# FIXME(woosuk): This is a temporary hack.
|
# FIXME(woosuk): This is a temporary hack.
|
||||||
mixtral_supported = [
|
mixtral_supported = [
|
||||||
|
|||||||
Reference in New Issue
Block a user