[FEAT] Support GGUF format (#2215)

Co-authored-by: Yang Zheng(SW)(Alex) <you@example.com>
2024-11-30 16:44:48 +08:00
parent 0d6a49bd7d
commit 883c955489
39 changed files with 180 additions and 89 deletions
--- a/python/sglang/srt/models/minicpm3.py
+++ b/python/sglang/srt/models/minicpm3.py
@@ -585,12 +585,10 @@ class MiniCPM3ForCausalLM(nn.Module):
        hidden_states = self.model(input_ids, positions, forward_batch, input_embeds)
        hidden_states = hidden_states / self.scale_width
        if self.config.tie_word_embeddings:
-            lm_head_weight = self.model.embed_tokens.weight
+            lm_head = self.model.embed_tokens
        else:
-            lm_head_weight = self.lm_head.weight
-        return self.logits_processor(
-            input_ids, hidden_states, lm_head_weight, forward_batch
-        )
+            lm_head = self.lm_head
+        return self.logits_processor(input_ids, hidden_states, lm_head, forward_batch)

    def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
        stacked_params_mapping = [