Upgrade to vllm 0.17.0 corex v4.1 overlay

This commit is contained in:
2026-04-29 19:38:22 +08:00
parent 8fac6062e4
commit 938d0854a5
430 changed files with 35969 additions and 14511 deletions

View File

@@ -164,7 +164,7 @@ class Ernie4_5_MoeMoE(nn.Module):
config.hidden_size,
config.moe_num_experts,
bias=False,
params_dtype=torch.float32,
# params_dtype=torch.float32,
quant_config=None,
prefix=f"{prefix}.gate",
)
@@ -209,7 +209,7 @@ class Ernie4_5_MoeMoE(nn.Module):
hidden_dim = hidden_states.shape[-1]
hidden_states = hidden_states.view(-1, hidden_dim)
router_logits, _ = self.gate(hidden_states.to(dtype=torch.float32))
router_logits, _ = self.gate(hidden_states)
final_hidden_states = self.experts(
hidden_states=hidden_states, router_logits=router_logits
@@ -429,7 +429,8 @@ class Ernie4_5_MoeModel(nn.Module):
self.num_redundant_experts = eplb_config.num_redundant_experts
if get_pp_group().is_first_rank:
if get_pp_group().is_first_rank or (config.tie_word_embeddings
and get_pp_group().is_last_rank):
self.embed_tokens = VocabParallelEmbedding(
config.vocab_size,
config.hidden_size,
@@ -653,11 +654,11 @@ class Ernie4_5_MoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA, MixtureOfExpe
quant_config=quant_config,
prefix=maybe_prefix(prefix, "lm_head"),
)
if self.config.tie_word_embeddings:
self.lm_head.weight = self.model.embed_tokens.weight
else:
self.lm_head = PPMissingLayer()
if self.config.tie_word_embeddings:
self.lm_head.weight = self.model.embed_tokens.weight
self.logits_processor = LogitsProcessor(config.vocab_size)
self.make_empty_intermediate_tensors = (
self.model.make_empty_intermediate_tensors