From 629fc4dfd5b635350d681e0293415fc29756c5f8 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Wed, 29 Apr 2026 06:30:35 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: ermiaazarkhalili/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth-GGUF Source: Original Platform --- .gitattributes | 44 ++++++++ ...2B-Function-Calling-xLAM-Unsloth.Q2_K.gguf | 3 + ...-Function-Calling-xLAM-Unsloth.Q3_K_M.gguf | 3 + ...-Function-Calling-xLAM-Unsloth.Q4_K_M.gguf | 3 + ...-Function-Calling-xLAM-Unsloth.Q5_K_M.gguf | 3 + ...2B-Function-Calling-xLAM-Unsloth.Q6_K.gguf | 3 + ...2B-Function-Calling-xLAM-Unsloth.Q8_0.gguf | 3 + LFM2.5-1.2B-Instruct.Q4_K_M.gguf | 3 + LFM2.5-1.2B-Instruct.Q5_K_M.gguf | 3 + LFM2.5-1.2B-Instruct.Q8_0.gguf | 3 + README.md | 105 ++++++++++++++++++ config.json | 62 +++++++++++ 12 files changed, 238 insertions(+) create mode 100644 .gitattributes create mode 100644 LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q2_K.gguf create mode 100644 LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q3_K_M.gguf create mode 100644 LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q4_K_M.gguf create mode 100644 LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q5_K_M.gguf create mode 100644 LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q6_K.gguf create mode 100644 LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q8_0.gguf create mode 100644 LFM2.5-1.2B-Instruct.Q4_K_M.gguf create mode 100644 LFM2.5-1.2B-Instruct.Q5_K_M.gguf create mode 100644 LFM2.5-1.2B-Instruct.Q8_0.gguf create mode 100644 README.md create mode 100644 config.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..2ab374e --- /dev/null +++ b/.gitattributes @@ -0,0 +1,44 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +LFM2.5-1.2B-Instruct.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text +LFM2.5-1.2B-Instruct.Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text +LFM2.5-1.2B-Instruct.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text +LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text +LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q6_K.gguf filter=lfs diff=lfs merge=lfs -text +LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text +LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text +LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text +LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q2_K.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q2_K.gguf b/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q2_K.gguf new file mode 100644 index 0000000..066312b --- /dev/null +++ b/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q2_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c8e2c49ce5287529babf80c3a6567fcf3f606ab166b0485c59f3aa5e4e17c5f +size 483396768 diff --git a/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q3_K_M.gguf b/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q3_K_M.gguf new file mode 100644 index 0000000..142f495 --- /dev/null +++ b/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q3_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56c855d8378c5bd59818446b146ea17133af31335cd11d505b16dc0d9ce2fae3 +size 600345760 diff --git a/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q4_K_M.gguf b/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q4_K_M.gguf new file mode 100644 index 0000000..64a570a --- /dev/null +++ b/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q4_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:172d49168e77cdda0d43ba14658537afda16dc7699e93756def3ff18b0f769b4 +size 730893472 diff --git a/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q5_K_M.gguf b/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q5_K_M.gguf new file mode 100644 index 0000000..7956d2f --- /dev/null +++ b/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q5_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85ce8b3d3b5c0f74bf2a517336572b11f242d3691c3ad479b1c2f1a3b5d5d25a +size 843353248 diff --git a/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q6_K.gguf b/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q6_K.gguf new file mode 100644 index 0000000..8ad7497 --- /dev/null +++ b/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q6_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcfe31a7394ff6a5513bbbafff6171a700a9d20e3e6ec1854b7b49ce5c8e32d5 +size 962841760 diff --git a/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q8_0.gguf b/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q8_0.gguf new file mode 100644 index 0000000..3188c92 --- /dev/null +++ b/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:def44364dbee1fba76deeefecb525f9d988ec37430a8ecf876096da6b1471cf2 +size 1246252192 diff --git a/LFM2.5-1.2B-Instruct.Q4_K_M.gguf b/LFM2.5-1.2B-Instruct.Q4_K_M.gguf new file mode 100644 index 0000000..52a23ef --- /dev/null +++ b/LFM2.5-1.2B-Instruct.Q4_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c22dc465cdd2c6fefd917ff4b90827ac30f9fd9c946bb5c09c31d742f335fb5 +size 730893472 diff --git a/LFM2.5-1.2B-Instruct.Q5_K_M.gguf b/LFM2.5-1.2B-Instruct.Q5_K_M.gguf new file mode 100644 index 0000000..92a30dd --- /dev/null +++ b/LFM2.5-1.2B-Instruct.Q5_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46473d1abe107a0061330ecc01e6abe998723303c2752098e91c900783209a47 +size 843353248 diff --git a/LFM2.5-1.2B-Instruct.Q8_0.gguf b/LFM2.5-1.2B-Instruct.Q8_0.gguf new file mode 100644 index 0000000..88d1afb --- /dev/null +++ b/LFM2.5-1.2B-Instruct.Q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85e9da3d0d35a3dda79cc56995d637b289ec8d1c28a53e5505447f2a72d1ea2e +size 1246252192 diff --git a/README.md b/README.md new file mode 100644 index 0000000..38d5b0f --- /dev/null +++ b/README.md @@ -0,0 +1,105 @@ +--- +base_model: ermiaazarkhalili/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth +tags: +- gguf +- llama.cpp +- unsloth +- lfm2 +- function-calling +- quantized +license: apache-2.0 +language: +- en +datasets: +- Salesforce/xlam-function-calling-60k +pipeline_tag: text-generation +--- + +# LFM2.5-1.2B-xLAM-Unsloth — GGUF quantized + +GGUF quantizations of [`ermiaazarkhalili/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth`](https://huggingface.co/ermiaazarkhalili/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth), +produced via [Unsloth](https://github.com/unslothai/unsloth) + llama.cpp's conversion scripts. + +| Field | Value | +|---|---| +| **Source checkpoint** | [`ermiaazarkhalili/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth`](https://huggingface.co/ermiaazarkhalili/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth) | +| **Base model** | [`LiquidAI/LFM2.5-1.2B-Instruct`](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct) | +| **Dataset** | [`Salesforce/xlam-function-calling-60k`](https://huggingface.co/datasets/Salesforce/xlam-function-calling-60k) | +| **Training** | N=1 full epoch (7,500 steps, effective batch=8) | +| **Conversion** | Unsloth `save_pretrained_gguf` → llama.cpp GGUF | +| **Quantization tool** | llama.cpp `llama-quantize` | + +## Available quantizations + +| File | Size | Notes | +|---|---|---| +| `LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q2_K.gguf` | smallest | 2-bit; extreme compression, quality loss | +| `LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q3_K_M.gguf` | small | 3-bit; modest quality trade-off | +| `LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q4_K_M.gguf` | recommended | 4-bit; best size/quality balance | +| `LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q5_K_M.gguf` | balanced | 5-bit; near-full quality | +| `LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q6_K.gguf` | high quality | 6-bit; minimal degradation | +| `LFM2.5-1.2B-Function-Calling-xLAM-Unsloth.Q8_0.gguf` | largest | 8-bit; closest to bf16 source | + +**Recommended default:** `Q4_K_M` (4-bit, K-quant medium). For memory-constrained deployment, try `Q2_K` or `Q3_K_M`. For maximum fidelity, use `Q8_0`. + +## Usage + +### llama.cpp + +```bash +# Text-only +llama-cli -hf ermiaazarkhalili/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth-GGUF --jinja -p "Find flights from SFO to NYC on December 25th" -n 256 + +# Interactive chat +llama-cli -hf ermiaazarkhalili/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth-GGUF --jinja -cnv +``` + +### Ollama + +```bash +ollama run hf.co/ermiaazarkhalili/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth-GGUF:Q4_K_M +``` + +### llama-cpp-python + +```python +from llama_cpp import Llama +llm = Llama.from_pretrained( + repo_id="ermiaazarkhalili/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth-GGUF", + filename="*Q4_K_M.gguf", + n_ctx=2048, +) +out = llm.create_chat_completion( + messages=[{"role": "user", "content": "Find flights from SFO to NYC on December 25th"}], + max_tokens=256, +) +print(out["choices"][0]["message"]["content"]) +``` + +## Intended use + +For research and non-commercial experimentation only. Outputs should be independently verified before any downstream use. + +## Limitations + +- GGUF quantizations have unavoidable quality loss relative to the source bfloat16 checkpoint. Use `Q5_K_M` or `Q8_0` for best fidelity. +- Inherits all limitations of the source merged checkpoint ([`ermiaazarkhalili/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth`](https://huggingface.co/ermiaazarkhalili/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth)). +- Limited to the 60 function schemas covered in the training dataset; performance on novel APIs may degrade. + +## Citation + +```bibtex +@misc{ lfm25_12b_xlam_unsloth_2026_gguf , + author = {Ermia Azarkhalili}, + title = { LFM2.5-1.2B-xLAM-Unsloth — GGUF quantized }, + year = {2026}, + publisher = {Hugging Face}, + howpublished = {\url{https://huggingface.co/ermiaazarkhalili/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth-GGUF}} +} +``` + +--- + +This lfm2 model was trained 2× faster with [Unsloth](https://github.com/unslothai/unsloth) and Hugging Face's TRL library. + +[](https://github.com/unslothai/unsloth) diff --git a/config.json b/config.json new file mode 100644 index 0000000..a37b01e --- /dev/null +++ b/config.json @@ -0,0 +1,62 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 2048, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 2048, + "conv_use_xavier_init": true, + "dtype": "bfloat16", + "eos_token_id": 7, + "full_attn_idxs": null, + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_name": "ermiaazarkhalili/LFM2.5-1.2B-Function-Calling-xLAM-Unsloth", + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 32, + "num_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_parameters": { + "rope_theta": 1000000.0, + "rope_type": "default" + }, + "tie_word_embeddings": true, + "transformers_version": "5.5.0", + "unsloth_version": "2026.4.6", + "use_cache": false, + "use_pos_enc": true, + "vocab_size": 65536 +}