forked from EngineX-Cambricon/enginex-mlu370-vllm
add deepseekv3 and llama4
This commit is contained in:
@@ -21,10 +21,6 @@ def set_default_torch_dtype(dtype: torch.dtype):
|
|||||||
def get_model_architecture(
|
def get_model_architecture(
|
||||||
model_config: ModelConfig) -> Tuple[Type[nn.Module], str]:
|
model_config: ModelConfig) -> Tuple[Type[nn.Module], str]:
|
||||||
architectures = getattr(model_config.hf_config, "architectures", None) or []
|
architectures = getattr(model_config.hf_config, "architectures", None) or []
|
||||||
print(f"[DEBUG-ARCH] get_model_architecture: "
|
|
||||||
f"type(hf_config)={type(model_config.hf_config).__name__}, "
|
|
||||||
f"architectures={getattr(model_config.hf_config, 'architectures', 'MISSING')}, "
|
|
||||||
f"id(hf_config)={id(model_config.hf_config)}")
|
|
||||||
# Special handling for quantized Mixtral.
|
# Special handling for quantized Mixtral.
|
||||||
# FIXME(woosuk): This is a temporary hack.
|
# FIXME(woosuk): This is a temporary hack.
|
||||||
mixtral_supported = [
|
mixtral_supported = [
|
||||||
|
|||||||
@@ -450,7 +450,10 @@ class Llama4ForCausalLM(nn.Module, SupportsPP):
|
|||||||
# which has text_config sub-config. Extract it for text model.
|
# which has text_config sub-config. Extract it for text model.
|
||||||
text_config = getattr(config, "text_config", None)
|
text_config = getattr(config, "text_config", None)
|
||||||
if text_config is not None:
|
if text_config is not None:
|
||||||
|
orig_archs = getattr(config, "architectures", None)
|
||||||
vllm_config.model_config.hf_config = text_config
|
vllm_config.model_config.hf_config = text_config
|
||||||
|
if orig_archs and not getattr(text_config, "architectures", None):
|
||||||
|
text_config.architectures = orig_archs
|
||||||
config = text_config
|
config = text_config
|
||||||
quant_config = vllm_config.quant_config
|
quant_config = vllm_config.quant_config
|
||||||
lora_config = vllm_config.lora_config
|
lora_config = vllm_config.lora_config
|
||||||
|
|||||||
@@ -230,11 +230,6 @@ def get_config(
|
|||||||
config.update({"architectures": [model_type]})
|
config.update({"architectures": [model_type]})
|
||||||
|
|
||||||
# Architecture mapping for models without explicit architectures field
|
# Architecture mapping for models without explicit architectures field
|
||||||
logger.warning("[DEBUG-ARCH] after load: type(config)=%s, "
|
|
||||||
"config.architectures=%s, config.model_type=%s",
|
|
||||||
type(config).__name__,
|
|
||||||
getattr(config, "architectures", "MISSING"),
|
|
||||||
getattr(config, "model_type", "MISSING"))
|
|
||||||
if not getattr(config, "architectures", None):
|
if not getattr(config, "architectures", None):
|
||||||
if config.model_type not in MODEL_MAPPING_NAMES:
|
if config.model_type not in MODEL_MAPPING_NAMES:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
|
|||||||
Reference in New Issue
Block a user