From 9c60ea8806e571355289f648233a07a7c0fd9105 Mon Sep 17 00:00:00 2001
From: ModelHub XC <noreply@modelhub.org.cn>
Date: Thu, 30 Apr 2026 18:40:46 +0800
Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?=
 =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?=
 =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Model: abhinav0231/Lily-1.5b-v0.1-GGUF
Source: Original Platform
---
 .gitattributes             |  39 +++++++
 Lily-1.5b-v0.1-F16.gguf    |   3 +
 Lily-1.5b-v0.1-Q4_K_M.gguf |   3 +
 Lily-1.5b-v0.1-Q5_K_M.gguf |   3 +
 Lily-1.5b-v0.1-Q8_0.gguf   |   3 +
 README.md                  | 226 +++++++++++++++++++++++++++++++++++++
 6 files changed, 277 insertions(+)
 create mode 100644 .gitattributes
 create mode 100644 Lily-1.5b-v0.1-F16.gguf
 create mode 100644 Lily-1.5b-v0.1-Q4_K_M.gguf
 create mode 100644 Lily-1.5b-v0.1-Q5_K_M.gguf
 create mode 100644 Lily-1.5b-v0.1-Q8_0.gguf
 create mode 100644 README.md

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..12e0b21
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,39 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+Lily-1.5b-v0.1-F16.gguf filter=lfs diff=lfs merge=lfs -text
+Lily-1.5b-v0.1-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
+Lily-1.5b-v0.1-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
+Lily-1.5b-v0.1-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
diff --git a/Lily-1.5b-v0.1-F16.gguf b/Lily-1.5b-v0.1-F16.gguf
new file mode 100644
index 0000000..d2cd61c
--- /dev/null
+++ b/Lily-1.5b-v0.1-F16.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c0ce3bee9e56031dc5c61cda98d06dd733645bb0792f63f577fd393d3eea9e1
+size 3093670528
diff --git a/Lily-1.5b-v0.1-Q4_K_M.gguf b/Lily-1.5b-v0.1-Q4_K_M.gguf
new file mode 100644
index 0000000..0504a9e
--- /dev/null
+++ b/Lily-1.5b-v0.1-Q4_K_M.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10f9e4c0c50ebeaaca91bfd354de64791520539ce02a8646a0f1a7b2ab510514
+size 986049664
diff --git a/Lily-1.5b-v0.1-Q5_K_M.gguf b/Lily-1.5b-v0.1-Q5_K_M.gguf
new file mode 100644
index 0000000..f968990
--- /dev/null
+++ b/Lily-1.5b-v0.1-Q5_K_M.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:22fd0d7e83b90bdd912d17cc5e7fdfe2c9ffba5eb3e8a15e76f0da2cfc1a5008
+size 1125051520
diff --git a/Lily-1.5b-v0.1-Q8_0.gguf b/Lily-1.5b-v0.1-Q8_0.gguf
new file mode 100644
index 0000000..d6c8b18
--- /dev/null
+++ b/Lily-1.5b-v0.1-Q8_0.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4475b161427fd51042f0799fb3ef521f754bf82ec95419706c5ea88243e206ee
+size 1646574208
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..8817304
--- /dev/null
+++ b/README.md
@@ -0,0 +1,226 @@
+---
+license: apache-2.0
+base_model: Qwen/Qwen2.5-1.5B-Instruct
+tags:
+  - qwen2.5
+  - chain-of-thought
+  - reasoning
+  - fine-tuned
+  - gguf
+language:
+  - en
+pipeline_tag: text-generation
+---
+
+# Lily 1.5B — v0.1
+
+Lily is a fine-tuned 1.5B parameter language model built on [Qwen 2.5 1.5B Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct). It is trained to reason explicitly before answering — every response includes a visible thinking step inside `<think>` tags, followed by the final answer inside `<answer>` tags.
+
+The model is optimized for precision and structured output. It stays direct, avoids filler phrases, and scales response depth to the complexity of the question.
+
+---
+
+## Model Details
+
+| Property | Value |
+|---|---|
+| **Base model** | Qwen/Qwen2.5-1.5B-Instruct |
+| **Parameters** | 1.5B |
+| **Context length** | 4096 tokens |
+| **Fine-tuning** | Supervised fine-tuning (SFT) on chain-of-thought formatted data |
+| **Output format** | `<think>...</think>` reasoning + `<answer>...</answer>` final response |
+| **License** | Apache 2.0 |
+
+---
+
+## Output Format
+
+Every response from Lily follows this structure:
+
+```
+<think>
+[Step-by-step reasoning, working through the problem before committing to an answer]
+</think>
+<answer>
+[Final response — structured, precise, and direct]
+</answer>
+```
+
+The `<think>` block is Lily's scratchpad — it plans, evaluates, and drafts before producing the answer. This makes the model's reasoning transparent and auditable.
+
+---
+
+## Quick Start
+
+### Transformers (Python)
+
+```python
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+
+model_id = "abhinav0231/Lily-1.5b-v0.1"
+
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    torch_dtype=torch.float16,
+    device_map="auto",
+)
+
+SYSTEM_PROMPT = (
+    "You are Lily, a precise and thoughtful AI assistant.\n\n"
+    "Always reason step by step inside <think></think> tags, "
+    "then write your final answer inside <answer></answer> tags.\n\n"
+    "When answering:\n"
+    "- Be thorough: cover all relevant aspects, not just the surface question\n"
+    "- Be specific: use exact values, names, and examples rather than vague generalities\n"
+    "- Structure long responses with markdown headers, code blocks, and lists where appropriate\n"
+    "- Lead with the most important information first\n"
+    "- Match the depth of your answer to the complexity of the question\n\n"
+    "Tone: direct and confident. Never use filler phrases like \"Certainly!\", "
+    "\"Great question!\", or \"Of course!\". Be helpful without being sycophantic."
+)
+
+def ask(question, max_new_tokens=512):
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPT},
+        {"role": "user",   "content": question},
+    ]
+    prompt = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True,
+    )
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        output = model.generate(
+            **inputs,
+            max_new_tokens=max_new_tokens,
+            temperature=0.7,
+            top_p=0.9,
+            do_sample=True,
+            pad_token_id=tokenizer.eos_token_id,
+        )
+    response = tokenizer.decode(
+        output[0][inputs["input_ids"].shape[-1]:],
+        skip_special_tokens=True,
+    )
+    return response
+
+print(ask("What is the difference between a list and a tuple in Python?"))
+```
+
+---
+
+## GGUF (llama.cpp / Ollama / LM Studio)
+
+Quantized GGUF versions are available at [abhinav0231/Lily-1.5b-v0.1-GGUF](https://huggingface.co/abhinav0231/Lily-1.5b-v0.1-GGUF).
+
+| Quant | Size | Use case |
+|---|---|---|
+| `Q4_K_M` | ~1.0 GB | Best balance of speed and quality for CPU inference |
+| `Q5_K_M` | ~1.2 GB | Better quality, still fast on CPU |
+| `Q8_0`   | ~1.6 GB | Near-lossless, recommended if VRAM/RAM allows |
+| `F16`    | ~3.1 GB | Full precision, GPU only |
+
+### llama.cpp
+
+```bash
+# Download a quant
+huggingface-cli download abhinav0231/Lily-1.5b-v0.1-GGUF \
+    Lily-1.5b-v0.1-Q4_K_M.gguf \
+    --local-dir ./
+
+# Run the server
+./llama.cpp/build/bin/llama-server \
+    -m Lily-1.5b-v0.1-Q4_K_M.gguf \
+    --ctx-size 4096 \
+    --port 8080
+```
+
+### Ollama
+
+```bash
+# Create a Modelfile
+cat > Modelfile << 'EOF'
+FROM ./Lily-1.5b-v0.1-Q4_K_M.gguf
+SYSTEM "You are Lily, a precise and thoughtful AI assistant.
+
+Always reason step by step inside <think></think> tags, then write your final answer inside <answer></answer> tags.
+
+When answering:
+- Be thorough: cover all relevant aspects, not just the surface question
+- Be specific: use exact values, names, and examples rather than vague generalities
+- Structure long responses with markdown headers, code blocks, and lists where appropriate
+- Lead with the most important information first
+- Match the depth of your answer to the complexity of the question
+
+Tone: direct and confident. Never use filler phrases like \"Certainly!\", \"Great question!\", or \"Of course!\". Be helpful without being sycophantic."
+EOF
+
+# Build and run
+ollama create lily -f Modelfile
+ollama run lily "Explain how transformers work"
+```
+
+---
+
+## System Prompt
+
+The system prompt below is embedded in the model's chat template and applied automatically when using `apply_chat_template`. You do **not** need to set it manually if using the Transformers pipeline — it is already the default.
+
+The critical sentence that triggers the `<think>/<answer>` format — kept verbatim from training — is:
+
+> *Always reason step by step inside `<think></think>` tags, then write your final answer inside `<answer></answer>` tags.*
+
+The rest of the system prompt shapes tone and response quality and can be overridden by passing a custom `system` message.
+
+---
+
+## Intended Use
+
+Lily is a general-purpose assistant fine-tune. It performs well on:
+
+- Reasoning and logic problems
+- Code explanation and generation
+- Structured question answering
+- Step-by-step problem solving
+
+The explicit `<think>` step makes it especially useful in applications where reasoning transparency matters — grading, debugging, tutoring, or any workflow where you need to see *why* the model gave a particular answer, not just the answer itself.
+
+---
+
+## Limitations
+
+- **1.5B parameters**: Not suited for tasks requiring broad world knowledge or long multi-document context
+- **v0.1**: Early release — output quality and format consistency will improve in future versions
+- **English primary**: Training data is predominantly English; multilingual performance is limited
+- **No tool use / function calling**: This version does not support structured tool call outputs
+
+---
+
+## Training
+
+Fine-tuned from `Qwen/Qwen2.5-1.5B-Instruct` using supervised fine-tuning on a dataset of chain-of-thought formatted examples. Each training example uses the `<think>/<answer>` output structure. Training was performed on a single T4 GPU via Google Colab.
+
+---
+
+## Citation
+
+If you use Lily in research or a project, please cite:
+
+```
+@misc{lily-1.5b-v0.1,
+  author    = {abhinav0231},
+  title     = {Lily 1.5B v0.1: A chain-of-thought fine-tune of Qwen 2.5 1.5B},
+  year      = {2025},
+  publisher = {HuggingFace},
+  url       = {https://huggingface.co/abhinav0231/Lily-1.5b-v0.1}
+}
+```
+
+---
+
+## License
+
+Apache 2.0 — see [LICENSE](https://www.apache.org/licenses/LICENSE-2.0).