readme : model : mtdm : lfm2 improvements (#15476)

* Support untied embeddings * Increase number of image tokens to 1024 * Add LFM2-VL to readme * Actually use untied embeddings
2025-08-22 09:29:08 +02:00
parent a0f98dd604
commit e288693669
5 changed files with 11 additions and 4 deletions
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -5474,8 +5474,13 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
                } break;
            case LLM_ARCH_LFM2:
                {
-                    tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
+                    tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD,      "weight"), {n_embd, n_vocab}, 0);
                    tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}, 0);
+                    output   = create_tensor(tn(LLM_TENSOR_OUTPUT,          "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
+
+                    if (output == NULL) {
+                        output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED);
+                    }

                    for (int i = 0; i < n_layer; ++i) {
                        auto & layer = layers[i];
@@ -17787,8 +17792,7 @@ struct llm_build_lfm2 : public llm_graph_context {
        cb(cur, "model.embedding_norm", -1);
        res->t_embd = cur;

-        // lm_head is tied with embeddings
-        cur = build_lora_mm(model.tok_embd, cur);
+        cur = build_lora_mm(model.output, cur);
        cb(cur, "lm_head", -1);

        res->t_logits = cur;