diff --git a/python/sglang/srt/models/exaone.py b/python/sglang/srt/models/exaone.py index 9cddcb34f..bb077f2c8 100644 --- a/python/sglang/srt/models/exaone.py +++ b/python/sglang/srt/models/exaone.py @@ -323,27 +323,6 @@ class ExaoneForCausalLM(nn.Module): sample_output = self.sampler(logits_output, input_metadata.sampling_info) return sample_output, logits_output - def get_module_name(self, name): - stacked_params_mapping = [ - # (param_name, shard_name, shard_id, num_shard) - ("qkv_proj", "q_proj", "q", 3), - ("qkv_proj", "k_proj", "k", 3), - ("qkv_proj", "v_proj", "v", 3), - ("gate_up_proj", "c_fc_0", 0, 2), - ("gate_up_proj", "c_fc_1", 1, 2), - ] - for param_name, weight_name, shard_id, num_shard in stacked_params_mapping: - if weight_name in name: - return ( - name.replace(weight_name, param_name)[: -len(".weight")], - num_shard, - ) - return name[: -len(".weight")], 1 - - def get_num_params(self): - params_dict = dict(self.named_parameters()) - return len(params_dict) - def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): stacked_params_mapping = [ # (param_name, shard_name, shard_id) @@ -357,13 +336,13 @@ class ExaoneForCausalLM(nn.Module): for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name or "projector" in name: - return + continue if "rotary_emb.cos_cached" in name or "rotary_emb.sin_cached" in name: # Models trained using ColossalAI may include these tensors in # the checkpoint. Skip them. - return + continue if name.startswith("model.vision_tower") and name not in params_dict: - return + continue name = name.replace("attn.attention", "self_attn") for param_name, weight_name, shard_id in stacked_params_mapping: @@ -380,7 +359,7 @@ class ExaoneForCausalLM(nn.Module): else: # Skip loading extra bias for GPTQ models. if name.endswith(".bias") and name not in params_dict: - return + continue param = params_dict[name] weight_loader = getattr(param, "weight_loader", default_weight_loader) weight_loader(param, loaded_weight) diff --git a/python/sglang/srt/models/llama.py b/python/sglang/srt/models/llama.py index b875e0c98..926d87db8 100644 --- a/python/sglang/srt/models/llama.py +++ b/python/sglang/srt/models/llama.py @@ -334,13 +334,13 @@ class LlamaForCausalLM(nn.Module): for name, loaded_weight in weights: if "rotary_emb.inv_freq" in name or "projector" in name: - return + continue if "rotary_emb.cos_cached" in name or "rotary_emb.sin_cached" in name: # Models trained using ColossalAI may include these tensors in # the checkpoint. Skip them. - return + continue if name.startswith("model.vision_tower") and name not in params_dict: - return + continue for param_name, weight_name, shard_id in stacked_params_mapping: if weight_name not in name: @@ -356,7 +356,7 @@ class LlamaForCausalLM(nn.Module): else: # Skip loading extra bias for GPTQ models. if name.endswith(".bias") and name not in params_dict: - return + continue param = params_dict[name] weight_loader = getattr(param, "weight_loader", default_weight_loader) weight_loader(param, loaded_weight)