From 1f72039b0e7bbb24e4b74bca61a69a69cf0f39aa Mon Sep 17 00:00:00 2001
From: ModelHub XC <noreply@modelhub.org.cn>
Date: Wed, 24 Jun 2026 11:18:20 +0800
Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?=
 =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?=
 =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Model: EphAsad/Atem-0.6B
Source: Original Platform
---
 .gitattributes         |  43 +++++
 Atem-0.6b.Q4_K_M.gguf  |   3 +
 Atem-0.6b.Q5_K_M.gguf  |   3 +
 Atem-0.6b.Q8_0.gguf    |   3 +
 Logo.png               |   3 +
 Modelfile              |  59 +++++++
 README.md              | 358 +++++++++++++++++++++++++++++++++++++++++
 chat_template.jinja    | 101 ++++++++++++
 config.json            |  64 ++++++++
 generation_config.json |  13 ++
 model.safetensors      |   3 +
 tokenizer.json         |   3 +
 tokenizer_config.json  | 234 +++++++++++++++++++++++++++
 13 files changed, 890 insertions(+)
 create mode 100644 .gitattributes
 create mode 100644 Atem-0.6b.Q4_K_M.gguf
 create mode 100644 Atem-0.6b.Q5_K_M.gguf
 create mode 100644 Atem-0.6b.Q8_0.gguf
 create mode 100644 Logo.png
 create mode 100644 Modelfile
 create mode 100644 README.md
 create mode 100644 chat_template.jinja
 create mode 100644 config.json
 create mode 100644 generation_config.json
 create mode 100644 model.safetensors
 create mode 100644 tokenizer.json
 create mode 100644 tokenizer_config.json

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..e595e95
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,43 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
+qwen3-0.6b.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
+qwen3-0.6b.Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
+qwen3-0.6b.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
+Atem-0.6b.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
+Atem-0.6b.Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
+Atem-0.6b.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
+Logo.png filter=lfs diff=lfs merge=lfs -text
diff --git a/Atem-0.6b.Q4_K_M.gguf b/Atem-0.6b.Q4_K_M.gguf
new file mode 100644
index 0000000..1c88960
--- /dev/null
+++ b/Atem-0.6b.Q4_K_M.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddef2fb68fbf63a4ac00716aad48647f5da3249513b771cb8a32a12c5b9202e1
+size 396705696
diff --git a/Atem-0.6b.Q5_K_M.gguf b/Atem-0.6b.Q5_K_M.gguf
new file mode 100644
index 0000000..5483c58
--- /dev/null
+++ b/Atem-0.6b.Q5_K_M.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9dae12e3543d08aa81ab2244d54226b4a6afc713682a0ced185b2474a9dd9354
+size 444415904
diff --git a/Atem-0.6b.Q8_0.gguf b/Atem-0.6b.Q8_0.gguf
new file mode 100644
index 0000000..acb8410
--- /dev/null
+++ b/Atem-0.6b.Q8_0.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75c17322b60c7e36d17149967844fe53881f9720e7125a8a2a58048c84ab8221
+size 639447968
diff --git a/Logo.png b/Logo.png
new file mode 100644
index 0000000..2f31b1d
--- /dev/null
+++ b/Logo.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:036d268ac79d1a5355a5ea602fc02ba312f4938c3506ddfa148ec7de44b69607
+size 981796
diff --git a/Modelfile b/Modelfile
new file mode 100644
index 0000000..b4da25b
--- /dev/null
+++ b/Modelfile
@@ -0,0 +1,59 @@
+
+FROM qwen3-0.6b.Q8_0.gguf
+TEMPLATE """{{- if .Messages }}
+{{- if or .System .Tools }}<|im_start|>system
+{{- if .System }}
+{{ .System }}
+{{- end }}
+{{- if .Tools }}
+
+# Tools
+
+You may call one or more functions to assist with the user query.
+
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{{- range .Tools }}
+{"type": "function", "function": {{ .Function }}}
+{{- end }}
+</tools>
+
+For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
+<tool_call>
+{"name": <function-name>, "arguments": <args-json-object>}
+</tool_call>
+{{- end }}<|im_end|>
+{{ end }}
+{{- range $i, $_ := .Messages }}
+{{- $last := eq (len (slice $.Messages $i)) 1 -}}
+{{- if eq .Role "user" }}<|im_start|>user
+{{ .Content }}<|im_end|>
+{{ else if eq .Role "assistant" }}<|im_start|>assistant
+{{ if .Content }}{{ .Content }}
+{{- else if .ToolCalls }}<tool_call>
+{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
+{{ end }}</tool_call>
+{{- end }}{{ if not $last }}<|im_end|>
+{{ end }}
+{{- else if eq .Role "tool" }}<|im_start|>user
+<tool_response>
+{{ .Content }}
+</tool_response><|im_end|>
+{{ end }}
+{{- if and (ne .Role "assistant") $last }}<|im_start|>assistant
+{{ end }}
+{{- end }}
+{{- else }}
+{{- if .System }}<|im_start|>system
+{{ .System }}<|im_end|>
+{{ end }}{{ if .Prompt }}<|im_start|>user
+{{ .Prompt }}<|im_end|>
+{{ end }}<|im_start|>assistant
+{{ end }}{{ .Response }}{{ if .Response }}<|im_end|>{{ end }}"""
+PARAMETER stop "<|im_end|>"
+PARAMETER stop "<|im_start|>"
+PARAMETER temperature 0.6
+PARAMETER min_p 0.0
+PARAMETER top_k 20
+PARAMETER top_p 0.95
+PARAMETER repeat_penalty 1
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..95ab122
--- /dev/null
+++ b/README.md
@@ -0,0 +1,358 @@
+---
+license: apache-2.0
+base_model: Qwen/Qwen3-0.6B
+tags:
+- unsloth
+- lora
+- qwen3
+- reasoning
+- distillation
+- conversational
+datasets:
+- EphAsad/QWENMillenium-SF
+- EphAsad/Phi4Millennium-SF
+- EphAsad/MistralMillenium-SF
+- Modotte/CodeX-2M-Thinking
+- Jackrong/Kimi-K2.5-Reasoning-1M-Cleaned
+- WithinUsAI/MiniMax_M2.7_Distilled_5k
+- tuanha1305/DeepSeek-R1-Distill
+- open-r1/OpenThoughts-114k-math
+- flytech/python-codes-25k
+- FreedomIntelligence/medical-o1-reasoning-SFT
+- Jackrong/Claude-opus-4.7-TraceInversion-5000x
+language:
+- en
+pipeline_tag: text-generation
+library_name: transformers
+---
+
+![Atem Logo](https://huggingface.co/EphAsad/Atem-0.6B/resolve/main/Logo.png)
+
+# Atem-0.6B
+
+*Ancient logic. Modern intelligence.*
+
+A 0.6B reasoning model trained via multi-source knowledge distillation from frontier teacher models.
+
+![Base Model](https://img.shields.io/badge/Base-Qwen3--0.6B-blue)![Method](https://img.shields.io/badge/Method-LoRA%20SFT-purple)![Parameters](https://img.shields.io/badge/Parameters-0.6B-orange)![License](https://img.shields.io/badge/License-Apache%202.0-green)
+
+---
+
+## Overview
+
+Atem-0.6B is a 0.6B parameter reasoning model built via supervised fine-tuning on a curated corpus of approximately 120,000 examples distilled from multiple frontier teacher models. Starting from Qwen/Qwen3-0.6B, Atem was trained using LoRA to preserve base model capabilities while shifting output style toward clean, directly-formatted final answers.
+
+This is **Stage 1** of a planned multi-stage training series, and the first entry in the Atem family built on Qwen3 rather than Qwen2.5. Stage 1 strips `<think>` reasoning traces from all training data, deliberately suppressing Qwen3's native exposed chain-of-thought in favor of direct answers. **Stage 2 (Atem-Savant-0.6B) is currently in progress**, layering curated chain-of-thought traces back on top of this foundation — see [Known Limitations](#known-limitations) for why that stage matters.
+
+---
+
+## Model Details
+
+| Property                 | Value                                   |
+| ------------------------ | ---------------------------------------- |
+| **Base model**           | Qwen/Qwen3-0.6B                          |
+| **Training method**      | LoRA Supervised Fine-Tuning (Stage 1)    |
+| **LoRA config**          | r=32, alpha=64, dropout=0.05             |
+| **Target modules**       | q, k, v, o, gate, up, down projections   |
+| **Parameters**           | ~596M                                    |
+| **Trainable (LoRA) params** | 20,185,088 (3.28% of base)            |
+| **Training records**     | 120,017                                  |
+| **Epochs**                | 2                                        |
+| **Effective batch size** | 128 (batch 32 × grad accum 4)            |
+| **Learning rate**        | 2e-4, cosine schedule, 5% warmup         |
+| **Final train loss**     | 1.055                                    |
+| **Final val loss**       | 1.073                                    |
+| **Hardware**             | NVIDIA A100-SXM4 80GB                    |
+| **Max sequence length**  | 4,096 tokens                             |
+| **Precision**            | bfloat16                                 |
+| **License**              | Apache 2.0                               |
+
+---
+
+## Intended Use
+
+Atem-0.6B is designed for lightweight, open-ended reasoning tasks where structured, direct answers add value at low compute cost:
+
+- Code explanation, implementation, and debugging
+- Mathematical problem solving with working shown
+- Analytical reasoning and hypothesis evaluation
+- Concept explanation and comparative analysis
+- Logic, argument, and fallacy identification
+
+Atem-0.6B is **not** designed for retrieval-heavy factual lookup, real-time information, or tasks requiring broad knowledge breadth beyond its training domains. At 0.6B parameters its capability ceiling is naturally lower than larger Atem models — expect it to be most useful where speed and footprint matter more than depth on hard, multi-step problems.
+
+---
+
+## Training Data
+
+Atem-0.6B was trained on a corpus assembled from eleven sources, combining domain-specific generated datasets and publicly available distillation datasets from frontier models. All outputs containing `<think>` reasoning traces were stripped to clean final responses for Stage 1 training.
+
+| Dataset                                      | Records      | Source / Teacher                                    |
+| --------------------------------------------- | ------------ | ----------------------------------------------------- |
+| EphAsad/QWENMillenium-SF                      | ~            | Qwen2.5-14B — Analytical & Scientific                  |
+| EphAsad/Phi4Millennium-SF                     | ~            | Phi-4 14B — Mathematical Reasoning                     |
+| EphAsad/MistralMillenium-SF                   | ~            | Mistral-Nemo-12B — Language & Comprehension            |
+| Modotte/CodeX-2M-Thinking                     | 40,000       | Mixed — Coding                                         |
+| Jackrong/Kimi-K2.5-Reasoning-1M-Cleaned       | 23,000       | Kimi K2.5 — General Distillation (English filtered)    |
+| WithinUsAI/MiniMax_M2.7_Distilled_5k          | 5,000        | MiniMax M2.7                                           |
+| tuanha1305/DeepSeek-R1-Distill                | 9,000        | DeepSeek-R1                                            |
+| open-r1/OpenThoughts-114k-math                | 10,000       | Mixed — Mathematics (correct answers only)             |
+| flytech/python-codes-25k                      | 10,000       | Python coding                                          |
+| FreedomIntelligence/medical-o1-reasoning-SFT  | 10,000       | Medical reasoning (English config)                     |
+| Jackrong/Claude-opus-4.7-TraceInversion-5000x | 5,000        | Claude Opus 4.7 — Trace Inversion                      |
+| **Total**                                     | **120,017**  |                                                         |
+
+---
+
+## Training Configuration
+
+```python
+# Key hyperparameters
+lora_r            = 32
+lora_alpha        = 64
+lora_dropout      = 0.05
+max_seq_length    = 4096
+learning_rate     = 2e-4
+lr_scheduler      = 'cosine'
+warmup_ratio      = 0.05
+batch_size        = 32
+grad_accumulation = 4        # effective batch size: 128
+num_epochs        = 2
+dtype             = bfloat16
+load_in_4bit      = True     # during training
+
+```
+
+Training used Unsloth with `train_on_responses_only` masking, ensuring loss was computed exclusively on assistant response tokens. Because Qwen3 ships with no default system prompt (unlike Qwen2.5-Instruct), Atem's identity is baked in via a chat-template modification that injects Atem as the default persona only when no explicit system message is supplied — explicit system messages still take priority. A pre-training validation suite verified this injection, confirmed the response-masking boundary correctly accounts for Qwen3's automatic empty `<think></think>` scaffold insertion, and checked for leaked reasoning content before training began.
+
+After training, LoRA adapters were merged into the base weights and exported as a full merged model.
+
+**Loss curve:**
+
+| Step  | Train Loss | Val Loss  |
+| ----- | ---------- | --------- |
+| 200   | 1.166      | 1.163     |
+| 800   | 1.108      | 1.096     |
+| 1400  | 0.983      | 1.077     |
+| Final (1876) | **1.055** | **1.073** |
+
+Validation loss plateaued around step 1600 of 1876 total steps — the final ~15% of training produced only marginal further improvement (1.074 → 1.073). Train loss showed some batch-to-batch volatility late in training (a step-1800 spike to 1.088, consistent with the dataset's domain diversity rather than divergence), but validation loss stayed smooth and never reversed, indicating no overfitting across the two epochs.
+
+---
+
+## Evaluation
+
+### Benchmark Results
+
+Evaluated against the base model (`unsloth/qwen3-0.6b-unsloth-bnb-4bit`) using lm-evaluation-harness.
+
+| Task                    | Base (Qwen3-0.6B) | Atem-0.6B | Delta       |
+| ------------------------ | ------------------ | ---------- | ----------- |
+| ARC-Challenge (0-shot, acc_norm) | 33.0%       | 35.0%      | +2.0% ✓     |
+| GSM8K (5-shot, strict-match)     | 26.7%       | **31.8%**  | **+5.1%** ✓ |
+| HellaSwag (0-shot, acc_norm)     | 45.3%       | 45.8%      | +0.5%       |
+
+**Eval condition note:** the base model was loaded in 4-bit (`unsloth/qwen3-0.6b-unsloth-bnb-4bit`); Atem-0.6B was evaluated as the full bfloat16 merged model. This is not a precision-matched comparison — the gap may be modestly inflated relative to a 4-bit-vs-4-bit or bf16-vs-bf16 run. GSM8K used 5-shot prompting per lm-eval's default config; ARC-Challenge and HellaSwag were 0-shot.
+
+The GSM8K gain is the standout figure, but it likely reflects Stage 1's training toward clean, directly-formatted final answers — which matters a great deal for lm-eval's exact-match-on-extracted-number scoring — more than a deeper improvement in multi-step mathematical reasoning. The qualitative evaluation below, which looks at harder, less templated problems, supports this reading: reasoning depth on multi-step problems is not uniformly better than the base model. ARC-Challenge and HellaSwag, which probe general/commonsense knowledge rather than output formatting, moved only slightly — expected, since SFT on this corpus isn't designed to add new general knowledge.
+
+### Qualitative Evaluation
+
+Atem-0.6B was evaluated against base Qwen3-0.6B (default thinking-enabled) across 30 domain-representative questions with matched system prompts.
+
+| Domain               | Questions | Outcome                                                                                  |
+| --------------------- | --------- | ----------------------------------------------------------------------------------------- |
+| Coding                | 8         | Mixed — comparable correctness; Atem notably more concise and direct                      |
+| Mathematics           | 6         | Mixed — base model's exposed reasoning self-corrects mid-generation on some multi-step problems that Atem commits to an error on |
+| Analytical Reasoning  | 6         | Base model edges ahead — exposed reasoning gives more room to work through multi-step arguments |
+| General Knowledge     | 5         | Comparable                                                                                 |
+| Language & Logic      | 5         | Comparable, slight edge to base on illustrative examples                                  |
+
+Atem-0.6B's outputs were consistently more concise and directly formatted — a direct result of Stage 1's design goal of suppressing exposed chain-of-thought. This did **not** translate into a uniform quality advantage over the base model: on problems requiring several sequential reasoning steps, the base model's visible thinking trace sometimes catches and corrects mistakes mid-generation that Atem, having no scratchpad, does not. This is the expected cost of the no-think format rather than a knowledge regression, and is the explicit target of the in-progress Stage 2 (Atem-Savant-0.6B) training, which reintroduces chain-of-thought.
+
+One output during qualitative testing showed repetitive/degenerate text (a duplicated bullet list) on a single open-ended analytical question — noted here for transparency rather than treated as representative.
+
+---
+
+## Usage
+
+### Transformers
+
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+
+model_name = "EphAsad/Atem-0.6B"
+
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype=torch.bfloat16,
+    device_map="auto"
+)
+
+messages = [
+    {
+        "role": "user",
+        "content": "Write a Python function that checks whether a number is prime."
+    }
+]
+
+inputs = tokenizer.apply_chat_template(
+    messages,
+    tokenize=True,
+    add_generation_prompt=True,
+    return_tensors="pt"
+).to(model.device)
+
+with torch.no_grad():
+    output = model.generate(
+        input_ids=inputs,
+        max_new_tokens=1000,
+        temperature=0.7,
+        top_p=0.9,
+        repetition_penalty=1.1,
+        do_sample=True,
+    )
+
+response = tokenizer.decode(
+    output[0][inputs.shape[1]:],
+    skip_special_tokens=True
+)
+print(response)
+
+```
+
+### Unsloth (faster inference)
+
+```python
+from unsloth import FastLanguageModel
+import torch
+
+model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name="EphAsad/Atem-0.6B",
+    max_seq_length=4096,
+    dtype=torch.bfloat16,
+    load_in_4bit=True,
+)
+FastLanguageModel.for_inference(model)
+
+messages = [
+    {
+        "role": "user",
+        "content": "Explain the difference between a stack and a queue, with examples."
+    }
+]
+
+inputs = tokenizer.apply_chat_template(
+    messages,
+    tokenize=True,
+    add_generation_prompt=True,
+    return_tensors="pt"
+).to("cuda")
+
+with torch.no_grad():
+    output = model.generate(
+        input_ids=inputs,
+        max_new_tokens=1000,
+        temperature=0.7,
+        top_p=0.9,
+        do_sample=True,
+    )
+
+print(tokenizer.decode(
+    output[0][inputs.shape[1]:],
+    skip_special_tokens=True
+))
+
+```
+
+### Ollama
+
+```bash
+# Recommended — best speed/quality balance
+ollama run hf.co/EphAsad/Atem-0.6B:Q4_K_M
+
+# Higher quality
+ollama run hf.co/EphAsad/Atem-0.6B:Q5_K_M
+
+# Near-lossless
+ollama run hf.co/EphAsad/Atem-0.6B:Q8_0
+
+```
+
+### llama.cpp
+
+```bash
+llama-server -hf EphAsad/Atem-0.6B:Q4_K_M
+
+```
+
+### System Prompt
+
+Atem-0.6B's identity is baked into the chat template and activates automatically when no system message is provided. For manual override:
+
+```
+You are Atem, a precise and analytical reasoning assistant. You approach
+every problem methodically — identifying core concepts, reasoning step by
+step, and arriving at well-supported conclusions. You show your thinking
+clearly and are thorough, direct, and intellectually honest.
+
+```
+
+### Available Files
+
+| File                       | Size       | Description                       |
+| --------------------------- | ---------- | ----------------------------------- |
+| `model.safetensors`         | ~1.2 GB    | Full bfloat16 merged weights        |
+| `Atem-0.6b.Q4_K_M.gguf`     | ~397 MB    | 4-bit quantised — recommended       |
+| `Atem-0.6b.Q5_K_M.gguf`     | ~444 MB    | 5-bit quantised                     |
+| `Atem-0.6b.Q8_0.gguf`       | ~700 MB    | 8-bit quantised — near-lossless     |
+
+---
+
+## Known Limitations
+
+**No thinking traces (Stage 1 by design).** Think tags were stripped from all training data for Stage 1, and Qwen3's native exposed reasoning is suppressed. The model does not produce extended `<think>` content. As shown in the qualitative evaluation above, this measurably costs accuracy on multi-step analytical and mathematical problems relative to the base model's default thinking-enabled behavior — Stage 2 (Atem-Savant-0.6B, in progress) exists specifically to recover this.
+
+**Smaller capability ceiling than larger Atem models.** At 0.6B parameters, this is the smallest model in the Atem family. Treat it as a fast, low-footprint option rather than a reasoning-depth flagship.
+
+**Mathematical precision on complex problems.** On multi-step calculations, the model may make arithmetic or counting errors without a scratchpad to catch them — verified directly in qualitative testing (e.g., miscounting combinatorial outcomes). Answers to high-stakes mathematical problems should be independently verified.
+
+**Eval precision asymmetry.** The benchmark comparison above evaluated the base model in 4-bit and Atem-0.6B in bfloat16 — see the Evaluation section for details. A precision-matched re-run would give a cleaner comparison.
+
+---
+
+## Roadmap
+
+Atem-0.6B establishes the Stage 1 foundation for the Qwen3-based branch of the Atem family. Planned next steps:
+
+- **Stage 2 (in progress):** Atem-Savant-0.6B — LoRA SFT on curated chain-of-thought data (~90% think-trace records, ~10% no-think) using OpenR1-Math, Kimi-K2.5, DeepSeek-V4-Pro-Reasoning, OpenCodeReasoning, and trace-inversion datasets, to recover multi-step reasoning depth on top of Stage 1's direct-answer foundation
+- **Extended benchmarks:** MMLU, BBH, IFEval post-Stage 2
+- **Precision-matched re-benchmark:** re-run base vs Atem comparison under identical 4-bit (or identical bf16) conditions
+
+---
+
+## Citation
+
+```bibtex
+@misc{atem_06b_2026,
+  author       = {Asad, Zain},
+  title        = {Atem-0.6B: A 0.6B Direct-Reasoning Model via
+                  Stage 1 SFT on Qwen3},
+  year         = {2026},
+  publisher    = {HuggingFace},
+  howpublished = {\url{https://huggingface.co/EphAsad/Atem-0.6B}},
+}
+
+```
+
+---
+
+## License
+
+Released under the [Apache 2.0 License](https://www.apache.org/licenses/LICENSE-2.0), consistent with the base model Qwen/Qwen3-0.6B.
+
+---
+
+Built independently by Zain Asad - [EphAsad](https://huggingface.co/EphAsad)
\ No newline at end of file
diff --git a/chat_template.jinja b/chat_template.jinja
new file mode 100644
index 0000000..293244b
--- /dev/null
+++ b/chat_template.jinja
@@ -0,0 +1,101 @@
+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0].role == 'system' %}
+        {{- messages[0].content + '\n\n' }}
+    {%- endif %}
+    {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
+    {%- else %}
+        {{- '<|im_start|>system\n' + 'You are Atem, a precise and analytical reasoning assistant. You approach every problem methodically — identifying core concepts, reasoning step by step, and arriving at well-supported conclusions. You show your thinking clearly and are thorough, direct, and intellectually honest.' + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for forward_message in messages %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- set message = messages[index] %}
+    {%- set current_content = message.content if message.content is defined and message.content is not none else '' %}
+    {%- set tool_start = '<tool_response>' %}
+    {%- set tool_start_length = tool_start|length %}
+    {%- set start_of_message = current_content[:tool_start_length] %}
+    {%- set tool_end = '</tool_response>' %}
+    {%- set tool_end_length = tool_end|length %}
+    {%- set start_pos = (current_content|length) - tool_end_length %}
+    {%- if start_pos < 0 %}
+        {%- set start_pos = 0 %}
+    {%- endif %}
+    {%- set end_of_message = current_content[start_pos:] %}
+    {%- if ns.multi_step_tool and message.role == "user" and not(start_of_message == tool_start and end_of_message == tool_end) %}
+        {%- set ns.multi_step_tool = false %}
+        {%- set ns.last_query_index = index %}
+    {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {%- set m_content = message.content if message.content is defined and message.content is not none else '' %}
+        {%- set content = m_content %}
+        {%- set reasoning_content = '' %}
+        {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
+            {%- set reasoning_content = message.reasoning_content %}
+        {%- else %}
+            {%- if '</think>' in m_content %}
+                {%- set content = (m_content.split('</think>')|last).lstrip('\n') %}
+                {%- set reasoning_content = (m_content.split('</think>')|first).rstrip('\n') %}
+                {%- set reasoning_content = (reasoning_content.split('<think>')|last).lstrip('\n') %}
+            {%- endif %}
+        {%- endif %}
+        {%- if loop.index0 > ns.last_query_index %}
+            {%- if loop.last or (not loop.last and (not reasoning_content.strip() == '')) %}
+                {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
+            {%- else %}
+                {{- '<|im_start|>' + message.role + '\n' + content }}
+            {%- endif %}
+        {%- else %}
+            {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- endif %}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if (loop.first and content) or (not loop.first) %}
+                    {{- '\n' }}
+                {%- endif %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<tool_call>\n{"name": "' }}
+                {{- tool_call.name }}
+                {{- '", "arguments": ' }}
+                {%- if tool_call.arguments is string %}
+                    {{- tool_call.arguments }}
+                {%- else %}
+                    {{- tool_call.arguments | tojson }}
+                {%- endif %}
+                {{- '}\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- message.content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+    {%- if enable_thinking is defined and enable_thinking is false %}
+        {{- '<think>\n\n</think>\n\n' }}
+    {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/config.json b/config.json
new file mode 100644
index 0000000..ff8229a
--- /dev/null
+++ b/config.json
@@ -0,0 +1,64 @@
+{
+    "architectures": [
+        "Qwen3ForCausalLM"
+    ],
+    "attention_bias": false,
+    "attention_dropout": 0.0,
+    "bos_token_id": null,
+    "torch_dtype": "bfloat16",
+    "eos_token_id": 151645,
+    "head_dim": 128,
+    "hidden_act": "silu",
+    "hidden_size": 1024,
+    "initializer_range": 0.02,
+    "intermediate_size": 3072,
+    "layer_types": [
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention"
+    ],
+    "max_position_embeddings": 40960,
+    "max_window_layers": 28,
+    "model_type": "qwen3",
+    "num_attention_heads": 16,
+    "num_hidden_layers": 28,
+    "num_key_value_heads": 8,
+    "pad_token_id": 151669,
+    "rms_norm_eps": 1e-06,
+    "rope_parameters": {
+        "rope_theta": 1000000,
+        "rope_type": "default"
+    },
+    "sliding_window": null,
+    "tie_word_embeddings": true,
+    "unsloth_fixed": true,
+    "unsloth_version": "2026.5.5",
+    "use_cache": false,
+    "use_sliding_window": false,
+    "vocab_size": 151936
+}
\ No newline at end of file
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000..a9abf9b
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,13 @@
+{
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "max_length": 40960,
+  "pad_token_id": 151669,
+  "temperature": 0.6,
+  "top_k": 20,
+  "top_p": 0.95,
+  "transformers_version": "5.5.0"
+}
diff --git a/model.safetensors b/model.safetensors
new file mode 100644
index 0000000..9339765
--- /dev/null
+++ b/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d7f0e3ed85d13b43971c3d1ed20e42fea849c360b42613d914507bdbf16f8c98
+size 1192135096
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000..7edcf72
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d7430e9138b76e93fb6f93462394d236b411111aef53cb421ba97d2691040cca
+size 11423114
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000..4ada1a1
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,234 @@
+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "is_local": false,
+  "model_max_length": 40960,
+  "pad_token": "<|PAD_TOKEN|>",
+  "padding_side": "left",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": false
+    },
+    "151665": {
+      "content": "<tool_response>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": false
+    },
+    "151666": {
+      "content": "</tool_response>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": false
+    },
+    "151667": {
+      "content": "<think>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": false
+    },
+    "151668": {
+      "content": "</think>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": false
+    },
+    "151669": {
+      "content": "<|PAD_TOKEN|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    }
+  },
+  "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0].role == 'system' %}\n        {{- messages[0].content + '\\n\\n' }}\n    {%- endif %}\n    {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0].role == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\n' + 'You are Atem, a precise and analytical reasoning assistant. You approach every problem methodically — identifying core concepts, reasoning step by step, and arriving at well-supported conclusions. You show your thinking clearly and are thorough, direct, and intellectually honest.' + '<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for forward_message in messages %}\n    {%- set index = (messages|length - 1) - loop.index0 %}\n    {%- set message = messages[index] %}\n    {%- set current_content = message.content if message.content is defined and message.content is not none else '' %}\n    {%- set tool_start = '<tool_response>' %}\n    {%- set tool_start_length = tool_start|length %}\n    {%- set start_of_message = current_content[:tool_start_length] %}\n    {%- set tool_end = '</tool_response>' %}\n    {%- set tool_end_length = tool_end|length %}\n    {%- set start_pos = (current_content|length) - tool_end_length %}\n    {%- if start_pos < 0 %}\n        {%- set start_pos = 0 %}\n    {%- endif %}\n    {%- set end_of_message = current_content[start_pos:] %}\n    {%- if ns.multi_step_tool and message.role == \"user\" and not(start_of_message == tool_start and end_of_message == tool_end) %}\n        {%- set ns.multi_step_tool = false %}\n        {%- set ns.last_query_index = index %}\n    {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {%- set m_content = message.content if message.content is defined and message.content is not none else '' %}\n        {%- set content = m_content %}\n        {%- set reasoning_content = '' %}\n        {%- if message.reasoning_content is defined and message.reasoning_content is not none %}\n            {%- set reasoning_content = message.reasoning_content %}\n        {%- else %}\n            {%- if '</think>' in m_content %}\n                {%- set content = (m_content.split('</think>')|last).lstrip('\\n') %}\n                {%- set reasoning_content = (m_content.split('</think>')|first).rstrip('\\n') %}\n                {%- set reasoning_content = (reasoning_content.split('<think>')|last).lstrip('\\n') %}\n            {%- endif %}\n        {%- endif %}\n        {%- if loop.index0 > ns.last_query_index %}\n            {%- if loop.last or (not loop.last and (not reasoning_content.strip() == '')) %}\n                {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n            {%- else %}\n                {{- '<|im_start|>' + message.role + '\\n' + content }}\n            {%- endif %}\n        {%- else %}\n            {{- '<|im_start|>' + message.role + '\\n' + content }}\n        {%- endif %}\n        {%- if message.tool_calls %}\n            {%- for tool_call in message.tool_calls %}\n                {%- if (loop.first and content) or (not loop.first) %}\n                    {{- '\\n' }}\n                {%- endif %}\n                {%- if tool_call.function %}\n                    {%- set tool_call = tool_call.function %}\n                {%- endif %}\n                {{- '<tool_call>\\n{\"name\": \"' }}\n                {{- tool_call.name }}\n                {{- '\", \"arguments\": ' }}\n                {%- if tool_call.arguments is string %}\n                    {{- tool_call.arguments }}\n                {%- else %}\n                    {{- tool_call.arguments | tojson }}\n                {%- endif %}\n                {{- '}\\n</tool_call>' }}\n            {%- endfor %}\n        {%- endif %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n    {%- if enable_thinking is defined and enable_thinking is false %}\n        {{- '<think>\\n\\n</think>\\n\\n' }}\n    {%- endif %}\n{%- endif %}"
+}
\ No newline at end of file