commit 9bc5b02b90f6d9679a1126c5d0656fcb1fe9c828 Author: ModelHub XC Date: Thu Apr 30 19:15:31 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: ermiaazarkhalili/LFM2.5-1.2B-SFT-Claude-Opus-Reasoning-Unsloth-GGUF Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..123ebaa --- /dev/null +++ b/.gitattributes @@ -0,0 +1,44 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +LFM2.5-1.2B-SFT-Claude-Reasoning-Unsloth-Q2_K.gguf filter=lfs diff=lfs merge=lfs -text +LFM2.5-1.2B-SFT-Claude-Reasoning-Unsloth-Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text +LFM2.5-1.2B-SFT-Claude-Reasoning-Unsloth-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text +LFM2.5-1.2B-SFT-Claude-Reasoning-Unsloth-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text +LFM2.5-1.2B-SFT-Claude-Reasoning-Unsloth-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text +LFM2.5-1.2B-SFT-Claude-Reasoning-Unsloth-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text +LFM2.5-1.2B-Instruct.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text +LFM2.5-1.2B-Instruct.Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text +LFM2.5-1.2B-Instruct.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/LFM2.5-1.2B-Instruct.Q4_K_M.gguf b/LFM2.5-1.2B-Instruct.Q4_K_M.gguf new file mode 100644 index 0000000..eca8414 --- /dev/null +++ b/LFM2.5-1.2B-Instruct.Q4_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:235b75a0ba70196d99602801be01dbdad601ffe4bd415b9d72b2378bbbfe6b1d +size 730893472 diff --git a/LFM2.5-1.2B-Instruct.Q5_K_M.gguf b/LFM2.5-1.2B-Instruct.Q5_K_M.gguf new file mode 100644 index 0000000..8da9a44 --- /dev/null +++ b/LFM2.5-1.2B-Instruct.Q5_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e994d7c8ce384c4ae8902a33f03fed03bf53ff2389c0f516530c5cf78d3c398 +size 843353248 diff --git a/LFM2.5-1.2B-Instruct.Q8_0.gguf b/LFM2.5-1.2B-Instruct.Q8_0.gguf new file mode 100644 index 0000000..983ea88 --- /dev/null +++ b/LFM2.5-1.2B-Instruct.Q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c474f6d9a0b9dc572777a738dfc2941c3ee6f7ad99a25eff22292e4384e1d5b9 +size 1246252192 diff --git a/LFM2.5-1.2B-SFT-Claude-Reasoning-Unsloth-Q2_K.gguf b/LFM2.5-1.2B-SFT-Claude-Reasoning-Unsloth-Q2_K.gguf new file mode 100644 index 0000000..967c400 --- /dev/null +++ b/LFM2.5-1.2B-SFT-Claude-Reasoning-Unsloth-Q2_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef5e2a505c5d86952672560f81edf0da9a19cb38d7b6d1c2b96b11090b3f3b1b +size 483397440 diff --git a/LFM2.5-1.2B-SFT-Claude-Reasoning-Unsloth-Q3_K_M.gguf b/LFM2.5-1.2B-SFT-Claude-Reasoning-Unsloth-Q3_K_M.gguf new file mode 100644 index 0000000..2ee0f3e --- /dev/null +++ b/LFM2.5-1.2B-SFT-Claude-Reasoning-Unsloth-Q3_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c91465b85f0d01816d926acc59e5bdda0c819e17208171c7e8aa89f8630e1b82 +size 600346432 diff --git a/LFM2.5-1.2B-SFT-Claude-Reasoning-Unsloth-Q4_K_M.gguf b/LFM2.5-1.2B-SFT-Claude-Reasoning-Unsloth-Q4_K_M.gguf new file mode 100644 index 0000000..2828b15 --- /dev/null +++ b/LFM2.5-1.2B-SFT-Claude-Reasoning-Unsloth-Q4_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fabbdeeff4eb68e08965d40791b25def4de004d60e19e29eac27fb16b77ed695 +size 730894144 diff --git a/LFM2.5-1.2B-SFT-Claude-Reasoning-Unsloth-Q5_K_M.gguf b/LFM2.5-1.2B-SFT-Claude-Reasoning-Unsloth-Q5_K_M.gguf new file mode 100644 index 0000000..225126a --- /dev/null +++ b/LFM2.5-1.2B-SFT-Claude-Reasoning-Unsloth-Q5_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a00f4398060bdba8dc86ad3f4251182d49cf8efcbd859ba91c12ea40cd2b92d +size 843353920 diff --git a/LFM2.5-1.2B-SFT-Claude-Reasoning-Unsloth-Q6_K.gguf b/LFM2.5-1.2B-SFT-Claude-Reasoning-Unsloth-Q6_K.gguf new file mode 100644 index 0000000..a404b00 --- /dev/null +++ b/LFM2.5-1.2B-SFT-Claude-Reasoning-Unsloth-Q6_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6304d21b658691f10f24960e347fb63f086abeb55fbe9ccbf118f79857072c8 +size 962842432 diff --git a/LFM2.5-1.2B-SFT-Claude-Reasoning-Unsloth-Q8_0.gguf b/LFM2.5-1.2B-SFT-Claude-Reasoning-Unsloth-Q8_0.gguf new file mode 100644 index 0000000..60f4392 --- /dev/null +++ b/LFM2.5-1.2B-SFT-Claude-Reasoning-Unsloth-Q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffe6c1a4d68b5aeff0274a30294c28a7a65dd43c1f34ab9602b34a0d8160959e +size 1246252864 diff --git a/README.md b/README.md new file mode 100644 index 0000000..c8a34cb --- /dev/null +++ b/README.md @@ -0,0 +1,105 @@ +--- +base_model: ermiaazarkhalili/LFM2.5-1.2B-SFT-Claude-Opus-Reasoning-Unsloth +tags: +- gguf +- llama.cpp +- unsloth +- lfm2 +- reasoning +- quantized +license: apache-2.0 +language: +- en +datasets: +- ermiaazarkhalili/Claude-Opus-4.7-Reasoning +pipeline_tag: text-generation +--- + +# LFM2.5-1.2B-SFT-Unsloth — GGUF quantized + +GGUF quantizations of [`ermiaazarkhalili/LFM2.5-1.2B-SFT-Claude-Opus-Reasoning-Unsloth`](https://huggingface.co/ermiaazarkhalili/LFM2.5-1.2B-SFT-Claude-Opus-Reasoning-Unsloth), +produced via [Unsloth](https://github.com/unslothai/unsloth) + llama.cpp's conversion scripts. + +| Field | Value | +|---|---| +| **Source checkpoint** | [`ermiaazarkhalili/LFM2.5-1.2B-SFT-Claude-Opus-Reasoning-Unsloth`](https://huggingface.co/ermiaazarkhalili/LFM2.5-1.2B-SFT-Claude-Opus-Reasoning-Unsloth) | +| **Base model** | [`LiquidAI/LFM2.5-1.2B-Instruct`](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct) | +| **Dataset** | [`ermiaazarkhalili/Claude-Opus-4.7-Reasoning`](https://huggingface.co/datasets/ermiaazarkhalili/Claude-Opus-4.7-Reasoning) | +| **Training** | N=1 full epoch (N=1 epoch steps, effective batch=8) | +| **Conversion** | Unsloth `save_pretrained_gguf` → llama.cpp GGUF | +| **Quantization tool** | llama.cpp `llama-quantize` | + +## Available quantizations + +| File | Size | Notes | +|---|---|---| +| `LFM2.5-1.2B-SFT-Claude-Opus-Reasoning-Unsloth.Q2_K.gguf` | smallest | 2-bit; extreme compression, quality loss | +| `LFM2.5-1.2B-SFT-Claude-Opus-Reasoning-Unsloth.Q3_K_M.gguf` | small | 3-bit; modest quality trade-off | +| `LFM2.5-1.2B-SFT-Claude-Opus-Reasoning-Unsloth.Q4_K_M.gguf` | recommended | 4-bit; best size/quality balance | +| `LFM2.5-1.2B-SFT-Claude-Opus-Reasoning-Unsloth.Q5_K_M.gguf` | balanced | 5-bit; near-full quality | +| `LFM2.5-1.2B-SFT-Claude-Opus-Reasoning-Unsloth.Q6_K.gguf` | high quality | 6-bit; minimal degradation | +| `LFM2.5-1.2B-SFT-Claude-Opus-Reasoning-Unsloth.Q8_0.gguf` | largest | 8-bit; closest to bf16 source | + +**Recommended default:** `Q4_K_M` (4-bit, K-quant medium). For memory-constrained deployment, try `Q2_K` or `Q3_K_M`. For maximum fidelity, use `Q8_0`. + +## Usage + +### llama.cpp + +```bash +# Text-only +llama-cli -hf ermiaazarkhalili/LFM2.5-1.2B-SFT-Claude-Opus-Reasoning-Unsloth-GGUF --jinja -p "Explain step-by-step: if a train travels 60 mph for 2.5 hours, how far does it go?" -n 256 + +# Interactive chat +llama-cli -hf ermiaazarkhalili/LFM2.5-1.2B-SFT-Claude-Opus-Reasoning-Unsloth-GGUF --jinja -cnv +``` + +### Ollama + +```bash +ollama run hf.co/ermiaazarkhalili/LFM2.5-1.2B-SFT-Claude-Opus-Reasoning-Unsloth-GGUF:Q4_K_M +``` + +### llama-cpp-python + +```python +from llama_cpp import Llama +llm = Llama.from_pretrained( + repo_id="ermiaazarkhalili/LFM2.5-1.2B-SFT-Claude-Opus-Reasoning-Unsloth-GGUF", + filename="*Q4_K_M.gguf", + n_ctx=2048, +) +out = llm.create_chat_completion( + messages=[{"role": "user", "content": "Explain step-by-step: if a train travels 60 mph for 2.5 hours, how far does it go?"}], + max_tokens=256, +) +print(out["choices"][0]["message"]["content"]) +``` + +## Intended use + +For research and non-commercial experimentation only. Outputs should be independently verified before any downstream use. + +## Limitations + +- GGUF quantizations have unavoidable quality loss relative to the source bfloat16 checkpoint. Use `Q5_K_M` or `Q8_0` for best fidelity. +- Inherits all limitations of the source merged checkpoint ([`ermiaazarkhalili/LFM2.5-1.2B-SFT-Claude-Opus-Reasoning-Unsloth`](https://huggingface.co/ermiaazarkhalili/LFM2.5-1.2B-SFT-Claude-Opus-Reasoning-Unsloth)). +- Distilled reasoning traces reflect patterns from Claude Opus 4.7 and may not generalize to domains outside the distillation corpus. + +## Citation + +```bibtex +@misc{ lfm25_12b_sft_claude_opus_2026_gguf , + author = {Ermia Azarkhalili}, + title = { LFM2.5-1.2B-SFT-Unsloth — GGUF quantized }, + year = {2026}, + publisher = {Hugging Face}, + howpublished = {\url{https://huggingface.co/ermiaazarkhalili/LFM2.5-1.2B-SFT-Claude-Opus-Reasoning-Unsloth-GGUF}} +} +``` + +--- + +This lfm2 model was trained 2× faster with [Unsloth](https://github.com/unslothai/unsloth) and Hugging Face's TRL library. + +[](https://github.com/unslothai/unsloth) diff --git a/config.json b/config.json new file mode 100644 index 0000000..c153b53 --- /dev/null +++ b/config.json @@ -0,0 +1,61 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 2048, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 2048, + "conv_use_xavier_init": true, + "torch_dtype": "bfloat16", + "eos_token_id": 7, + "full_attn_idxs": null, + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_name": "LiquidAI/LFM2.5-1.2B-Instruct", + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 32, + "num_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_parameters": { + "rope_theta": 1000000.0, + "rope_type": "default" + }, + "tie_word_embeddings": true, + "unsloth_version": "2026.4.6", + "use_cache": false, + "use_pos_enc": true, + "vocab_size": 65536 +} \ No newline at end of file