初始化项目,由ModelHub XC社区提供模型
Model: YoAbriel/KodaLite-1.3B Source: Original Platform
This commit is contained in:
35
.gitattributes
vendored
Normal file
35
.gitattributes
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
243
README.md
Normal file
243
README.md
Normal file
@@ -0,0 +1,243 @@
|
||||
---
|
||||
language: en
|
||||
license: apache-2.0
|
||||
library_name: transformers
|
||||
pipeline_tag: text-generation
|
||||
tags:
|
||||
- text-generation
|
||||
- llama
|
||||
- from-scratch
|
||||
- jax
|
||||
model-index:
|
||||
- name: KodaLite-1.3B
|
||||
results:
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
name: HellaSwag (zero-shot)
|
||||
type: hellaswag
|
||||
metrics:
|
||||
- type: accuracy
|
||||
value: 0.2565
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
name: ARC-Easy (zero-shot)
|
||||
type: ai2_arc
|
||||
metrics:
|
||||
- type: accuracy
|
||||
value: 0.3279
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
name: ARC-Challenge (zero-shot)
|
||||
type: ai2_arc
|
||||
metrics:
|
||||
- type: accuracy
|
||||
value: 0.2150
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
name: WinoGrande (zero-shot)
|
||||
type: winogrande
|
||||
metrics:
|
||||
- type: accuracy
|
||||
value: 0.4957
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
name: PIQA (zero-shot)
|
||||
type: piqa
|
||||
metrics:
|
||||
- type: accuracy
|
||||
value: 0.5892
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
name: BoolQ (zero-shot)
|
||||
type: boolq
|
||||
metrics:
|
||||
- type: accuracy
|
||||
value: 0.4434
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
name: OpenBookQA (zero-shot)
|
||||
type: openbookqa
|
||||
metrics:
|
||||
- type: accuracy
|
||||
value: 0.2500
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
name: LAMBADA (OpenAI, zero-shot)
|
||||
type: lambada_openai
|
||||
metrics:
|
||||
- type: accuracy
|
||||
value: 0.1822
|
||||
- type: perplexity
|
||||
value: 93.78
|
||||
---
|
||||
|
||||
# KodaLite-1.3B (Koda-v0.1)
|
||||
|
||||
A **1.27B** parameter LLaMA-style decoder-only language model, trained **entirely from scratch** on 2x NVIDIA L40S GPUs using JAX + Flax NNX, then converted to HuggingFace Transformers format.
|
||||
|
||||
> **TL;DR** — KodaLite reaches ~37% average accuracy on standard LLM benchmarks. It is **severely undertrained** (only 1.64B tokens vs 40B–3T for comparable models), which places it just below GPT-2-124M despite having 10× more parameters. A nice illustration of the **Chinchilla scaling law**: tokens matter more than parameters at this budget.
|
||||
|
||||
## Benchmark results (zero-shot, 8 standard tasks)
|
||||
|
||||
Evaluated against 8 comparable ~1B-parameter models on the same benchmarks (HellaSwag, ARC-E/C, WinoGrande, PIQA, BoolQ, OpenBookQA, LAMBADA-OpenAI).
|
||||
|
||||
| Rank | Model | Params | Train tokens | Avg accuracy |
|
||||
|---|---|---|---|---|
|
||||
| 1 | TinyLlama-1.1B | 1.10B | 3000B | **50.3%** |
|
||||
| 2 | Pythia-1.4B | 1.41B | 300B | **50.2%** |
|
||||
| 3 | GPT-2-XL | 1.56B | 40B | **49.4%** |
|
||||
| 4 | OPT-1.3B | 1.32B | 180B | **49.1%** |
|
||||
| 5 | Pythia-1B | 1.01B | 300B | **47.6%** |
|
||||
| 6 | GPT-2-large | 0.77B | 40B | **46.2%** |
|
||||
| 7 | GPT-2-medium | 0.35B | 40B | **44.2%** |
|
||||
| 8 | GPT-2-124m | 0.12B | 40B | **39.7%** |
|
||||
| **9** | **KodaLite-1.3B** | **1.27B** | **1.64B** | **36.8%** |
|
||||
|
||||
### Per-task breakdown
|
||||
|
||||
| Task | KodaLite-1.3B | GPT-2-124M | GPT-2-XL | Pythia-1.4B | TinyLlama-1.1B | Random |
|
||||
|---|---|---|---|---|---|---|
|
||||
| HellaSwag | 25.65 | 29.22 | 47.94 | 49.21 | 56.2 | 25.0 |
|
||||
| ARC-Easy | 32.79 | 38.30 | 50.80 | 51.73 | 43.9 | 25.0 |
|
||||
| ARC-Challenge | 21.50 | 22.70 | 28.16 | 29.01 | 30.0 | 25.0 |
|
||||
| WinoGrande | 49.57 | 49.49 | 51.93 | 52.88 | 52.2 | 50.0 |
|
||||
| PIQA | 58.92 | 62.24 | 70.89 | 71.22 | 72.1 | 50.0 |
|
||||
| BoolQ | 44.34 | 49.76 | 61.59 | 63.70 | 60.6 | 50.0 |
|
||||
| OpenBookQA | 25.00 | 26.40 | 34.20 | 33.40 | 37.2 | 25.0 |
|
||||
| LAMBADA (acc / ppl) | 18.22 / 93.8 | 30.84 / 17.5 | 50.79 / 6.4 | 61.03 / 3.8 | — | — |
|
||||
|
||||
## Why KodaLite scores below GPT-2-124M (despite being 10× bigger)
|
||||
|
||||
The **Chinchilla scaling law** (DeepMind, 2022) states that a model with N parameters needs approximately **20×N training tokens** to be well-trained:
|
||||
|
||||
| Model | Params | Chinchilla target (~20× params) | Actual tokens | Ratio |
|
||||
|---|---|---|---|---|
|
||||
| **KodaLite-1.3B** | 1.27B | ~25B | **1.64B** | **6.5 %** 🔴 |
|
||||
| GPT-2-XL | 1.5B | ~30B | 40B | 133 % |
|
||||
| Pythia-1.4B | 1.4B | ~28B | 300B | 1070 % |
|
||||
| TinyLlama-1.1B | 1.1B | ~22B | 3000B | 13600 % |
|
||||
|
||||
KodaLite has seen **only 6.5%** of what it would need to be competitive. A bigger but undertrained model scores lower than a smaller but well-trained one. The LAMBADA perplexity (94 vs 17 for GPT-2-124M) is the clearest signal: the base language modeling is not converged.
|
||||
|
||||
On **PIQA** (physical commonsense) the gap is smallest — that kind of knowledge appears to be learned faster than factual knowledge or precise language modeling.
|
||||
|
||||
## Chat Format
|
||||
|
||||
Model uses 3 text markers (no special tokens): `<|user|>`, `<|assistant|>`, `<|end|>`.
|
||||
|
||||
```
|
||||
<|user|>
|
||||
Your question
|
||||
<|assistant|>
|
||||
Model response
|
||||
<|end|>
|
||||
```
|
||||
|
||||
**Important**: `<|end|>` is NOT a single token (it tokenizes to 5 BPE tokens). Always pass it as a `stop_strings` parameter when generating, otherwise the model will run past its natural end-of-turn.
|
||||
|
||||
## Usage (Transformers)
|
||||
|
||||
```python
|
||||
import torch
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
tok = AutoTokenizer.from_pretrained("YoAbriel/KodaLite-1.3B")
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"YoAbriel/KodaLite-1.3B", dtype=torch.bfloat16, device_map="auto"
|
||||
)
|
||||
|
||||
msg = [{"role": "user", "content": "What is the capital of France?"}]
|
||||
prompt = tok.apply_chat_template(msg, tokenize=False, add_generation_prompt=False)
|
||||
inputs = tok(prompt, return_tensors="pt").to(model.device)
|
||||
|
||||
out = model.generate(
|
||||
**inputs, max_new_tokens=150, do_sample=True, temperature=0.7, top_k=40,
|
||||
stop_strings=["<|end|>"], tokenizer=tok,
|
||||
)
|
||||
print(tok.decode(out[0][inputs.input_ids.shape[1]:], skip_special_tokens=False))
|
||||
```
|
||||
|
||||
## Usage (MLX — Apple Silicon)
|
||||
|
||||
See [YoAbriel/KodaLite-1.3B-mlx](https://huggingface.co/YoAbriel/KodaLite-1.3B-mlx).
|
||||
|
||||
```python
|
||||
from mlx_lm import load, stream_generate
|
||||
model, tok = load("YoAbriel/KodaLite-1.3B-mlx-8bit")
|
||||
|
||||
def chat(q):
|
||||
prompt = tok.apply_chat_template([{"role": "user", "content": q}], tokenize=False)
|
||||
text = ""
|
||||
for resp in stream_generate(model, tok, prompt=prompt, max_tokens=150):
|
||||
text += resp.text
|
||||
if "<|end|>" in text:
|
||||
return text.split("<|end|>")[0]
|
||||
return text
|
||||
|
||||
print(chat("What is the capital of France?"))
|
||||
```
|
||||
|
||||
## Usage (llama.cpp / Ollama / LM Studio)
|
||||
|
||||
See [YoAbriel/KodaLite-1.3B-GGUF](https://huggingface.co/YoAbriel/KodaLite-1.3B-GGUF).
|
||||
|
||||
```bash
|
||||
ollama run hf.co/YoAbriel/KodaLite-1.3B-GGUF:Q4_K_M
|
||||
```
|
||||
|
||||
**LM Studio note**: the model was trained with `<|end|>` as a multi-token end marker. Since GGUF only supports single-token EOS, you need to **manually add `<|end|>` as a Stop String** in LM Studio's Advanced Settings.
|
||||
|
||||
## Architecture (LLaMA-compatible)
|
||||
|
||||
| Component | Value |
|
||||
|---|---|
|
||||
| Parameters | 1.27B |
|
||||
| Layers | 24 |
|
||||
| Hidden size | 2048 |
|
||||
| Attention | GQA (32Q / 8KV heads) |
|
||||
| Head dim | 64 |
|
||||
| FFN | SwiGLU, intermediate 5504 |
|
||||
| Normalization | RMSNorm (pre-norm) |
|
||||
| Position | RoPE (theta=10000) |
|
||||
| Context | 1024 tokens |
|
||||
| Vocab | 50,257 (GPT-2 BPE) |
|
||||
|
||||
## Training
|
||||
|
||||
### Pre-training
|
||||
- **Dataset**: SlimPajama-6B (streaming)
|
||||
- **Tokens seen**: 1.64B
|
||||
- **Hardware**: 2x NVIDIA L40S (96GB VRAM total)
|
||||
- **Precision**: bfloat16
|
||||
- **Framework**: JAX + Flax NNX (trained from scratch, no base model)
|
||||
|
||||
### SFT
|
||||
- **Datasets**: Databricks Dolly-15K + OpenAssistant OASST1
|
||||
- **Method**: LoRA (rank=16, alpha=32), then merged into base weights
|
||||
- **End-of-turn marker**: `<|end|>` (5 BPE tokens, NOT a special token)
|
||||
|
||||
## Limitations
|
||||
|
||||
- **Severely undertrained** (6.5% of Chinchilla-optimal) — factual accuracy is low
|
||||
- May produce repetitive or inaccurate responses
|
||||
- English only
|
||||
- 1024 context window
|
||||
- Educational / research project — not production-ready
|
||||
|
||||
## Lessons learned (for a potential v0.2)
|
||||
|
||||
1. **Train longer**: aim for 20B+ tokens (Chinchilla-optimal for 1.3B would be ~25B).
|
||||
2. **Use `<|endoftext|>` (single token) as end-of-turn marker** for native GGUF/LM Studio stop support.
|
||||
3. SwiGLU + RMSNorm + GQA + RoPE architecture is correct — no issues there, confirmed by the fact that our scaling follows the expected curve.
|
||||
|
||||
## License
|
||||
|
||||
Apache 2.0
|
||||
5
chat_template.jinja
Normal file
5
chat_template.jinja
Normal file
@@ -0,0 +1,5 @@
|
||||
{% for m in messages %}{% if m['role'] == 'user' %}<|user|>
|
||||
{{ m['content'] }}
|
||||
<|assistant|>
|
||||
{% elif m['role'] == 'assistant' %}{{ m['content'] }}
|
||||
<|end|>{% endif %}{% endfor %}
|
||||
25
config.json
Normal file
25
config.json
Normal file
@@ -0,0 +1,25 @@
|
||||
{
|
||||
"architectures": [
|
||||
"LlamaForCausalLM"
|
||||
],
|
||||
"model_type": "llama",
|
||||
"vocab_size": 50257,
|
||||
"hidden_size": 2048,
|
||||
"intermediate_size": 5504,
|
||||
"num_hidden_layers": 24,
|
||||
"num_attention_heads": 32,
|
||||
"num_key_value_heads": 8,
|
||||
"head_dim": 64,
|
||||
"hidden_act": "silu",
|
||||
"max_position_embeddings": 1024,
|
||||
"rope_theta": 10000.0,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"tie_word_embeddings": false,
|
||||
"torch_dtype": "bfloat16",
|
||||
"bos_token_id": 50256,
|
||||
"eos_token_id": 50256,
|
||||
"pad_token_id": 50256,
|
||||
"initializer_range": 0.02,
|
||||
"use_cache": true,
|
||||
"transformers_version": "4.46.0"
|
||||
}
|
||||
13
generation_config.json
Normal file
13
generation_config.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"bos_token_id": 50256,
|
||||
"eos_token_id": 50256,
|
||||
"pad_token_id": 50256,
|
||||
"stop_strings": [
|
||||
"<|end|>"
|
||||
],
|
||||
"do_sample": true,
|
||||
"temperature": 0.7,
|
||||
"top_k": 40,
|
||||
"max_new_tokens": 150,
|
||||
"transformers_version": "4.46.0"
|
||||
}
|
||||
3
model.safetensors
Normal file
3
model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6f605100c599dad4b2559c38e7afe892d61947ae93b0b4ba9b1bd384a8b95d63
|
||||
size 2538443400
|
||||
250306
tokenizer.json
Normal file
250306
tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
13
tokenizer_config.json
Normal file
13
tokenizer_config.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"add_prefix_space": false,
|
||||
"backend": "tokenizers",
|
||||
"bos_token": "<|endoftext|>",
|
||||
"eos_token": "<|endoftext|>",
|
||||
"errors": "replace",
|
||||
"is_local": false,
|
||||
"model_max_length": 1024,
|
||||
"pad_token": "<|endoftext|>",
|
||||
"tokenizer_class": "GPT2Tokenizer",
|
||||
"unk_token": "<|endoftext|>",
|
||||
"chat_template": "{% for m in messages %}{% if m['role'] == 'user' %}<|user|>\n{{ m['content'] }}\n<|assistant|>\n{% elif m['role'] == 'assistant' %}{{ m['content'] }}\n<|end|>{% endif %}{% endfor %}"
|
||||
}
|
||||
Reference in New Issue
Block a user