Upgrade to vllm 0.17.0 corex v4.1 overlay
This commit is contained in:
@@ -164,7 +164,7 @@ class Ernie4_5_MoeMoE(nn.Module):
|
||||
config.hidden_size,
|
||||
config.moe_num_experts,
|
||||
bias=False,
|
||||
params_dtype=torch.float32,
|
||||
# params_dtype=torch.float32,
|
||||
quant_config=None,
|
||||
prefix=f"{prefix}.gate",
|
||||
)
|
||||
@@ -209,7 +209,7 @@ class Ernie4_5_MoeMoE(nn.Module):
|
||||
hidden_dim = hidden_states.shape[-1]
|
||||
hidden_states = hidden_states.view(-1, hidden_dim)
|
||||
|
||||
router_logits, _ = self.gate(hidden_states.to(dtype=torch.float32))
|
||||
router_logits, _ = self.gate(hidden_states)
|
||||
|
||||
final_hidden_states = self.experts(
|
||||
hidden_states=hidden_states, router_logits=router_logits
|
||||
@@ -429,7 +429,8 @@ class Ernie4_5_MoeModel(nn.Module):
|
||||
|
||||
self.num_redundant_experts = eplb_config.num_redundant_experts
|
||||
|
||||
if get_pp_group().is_first_rank:
|
||||
if get_pp_group().is_first_rank or (config.tie_word_embeddings
|
||||
and get_pp_group().is_last_rank):
|
||||
self.embed_tokens = VocabParallelEmbedding(
|
||||
config.vocab_size,
|
||||
config.hidden_size,
|
||||
@@ -653,11 +654,11 @@ class Ernie4_5_MoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA, MixtureOfExpe
|
||||
quant_config=quant_config,
|
||||
prefix=maybe_prefix(prefix, "lm_head"),
|
||||
)
|
||||
if self.config.tie_word_embeddings:
|
||||
self.lm_head.weight = self.model.embed_tokens.weight
|
||||
else:
|
||||
self.lm_head = PPMissingLayer()
|
||||
|
||||
if self.config.tie_word_embeddings:
|
||||
self.lm_head.weight = self.model.embed_tokens.weight
|
||||
self.logits_processor = LogitsProcessor(config.vocab_size)
|
||||
self.make_empty_intermediate_tensors = (
|
||||
self.model.make_empty_intermediate_tensors
|
||||
|
||||
Reference in New Issue
Block a user