初始化项目,由ModelHub XC社区提供模型
Model: amitbehura/philosophy-oracle-smollm2-360m Source: Original Platform
This commit is contained in:
37
.gitattributes
vendored
Normal file
37
.gitattributes
vendored
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.model filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
SmolLM2-360M-Instruct.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
philosophy-oracle-smollm2-360m-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
|
||||||
119
README.md
Normal file
119
README.md
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
---
|
||||||
|
language: en
|
||||||
|
license: apache-2.0
|
||||||
|
base_model: HuggingFaceTB/SmolLM2-360M-Instruct
|
||||||
|
tags:
|
||||||
|
- philosophy
|
||||||
|
- fine-tuned
|
||||||
|
- qlora
|
||||||
|
- smollm2
|
||||||
|
- literature
|
||||||
|
- text-generation
|
||||||
|
- llama
|
||||||
|
- conversational
|
||||||
|
pipeline_tag: text-generation
|
||||||
|
---
|
||||||
|
|
||||||
|
# Philosophy Oracle — SmolLM2 360M
|
||||||
|
|
||||||
|
A fine-tuned version of SmolLM2-360M-Instruct, trained on 2,000+ synthetic Q&A pairs derived from 34 philosophy and literature classics. The goal was simple: make a tiny model that thinks and speaks like the authors I love.
|
||||||
|
|
||||||
|
## Model Details
|
||||||
|
|
||||||
|
| Property | Value |
|
||||||
|
|---|---|
|
||||||
|
| Base Model | HuggingFaceTB/SmolLM2-360M-Instruct |
|
||||||
|
| Fine-tuning Method | QLoRA (4-bit) |
|
||||||
|
| Training Epochs | 3 |
|
||||||
|
| Training Pairs | ~2,000 |
|
||||||
|
| Merged Size | 694 MB |
|
||||||
|
| GGUF (Q8_0) Size | 386 MB |
|
||||||
|
| Framework | Unsloth Studio |
|
||||||
|
|
||||||
|
## Training Corpus
|
||||||
|
|
||||||
|
34 books across philosophy, existentialism, psychoanalysis, and literature. Pair counts weighted by philosophical depth:
|
||||||
|
|
||||||
|
| Book | Pairs |
|
||||||
|
|---|---|
|
||||||
|
| Thus Spoke Zarathustra | 150 |
|
||||||
|
| Notes from Underground | 120 |
|
||||||
|
| The Myth of Sisyphus | 121 |
|
||||||
|
| Civilization and Its Discontents | 100 |
|
||||||
|
| The Possessed | 100 |
|
||||||
|
| Upanishads | 112 |
|
||||||
|
| The Fall | 70 |
|
||||||
|
| Totem and Taboo | 70 |
|
||||||
|
| The Interpretation of Dreams | 70 |
|
||||||
|
| Moby-Dick | 70 |
|
||||||
|
| The Symposium | 80 |
|
||||||
|
| Rig Veda | 80 |
|
||||||
|
| Book of Chuang Tzu | 80 |
|
||||||
|
| Tao Te Ching | 100 |
|
||||||
|
| Gilgamesh | 70 |
|
||||||
|
| The Social Contract | 60 |
|
||||||
|
| The Three Theban Plays | 50 |
|
||||||
|
| The Trial | 50 |
|
||||||
|
| The Death of Ivan Ilyich | 50 |
|
||||||
|
| The Oresteia | 50 |
|
||||||
|
| The Need for Roots | 50 |
|
||||||
|
| Four Archetypes | 50 |
|
||||||
|
| A Room of One's Own | 40 |
|
||||||
|
| Oppression and Liberty | 40 |
|
||||||
|
| Fathers and Sons | 40 |
|
||||||
|
| The Metamorphosis | 40 |
|
||||||
|
| War and Peace | 40 |
|
||||||
|
| The Bell Jar | 30 |
|
||||||
|
| To the Lighthouse | 30 |
|
||||||
|
| Wuthering Heights | 30 |
|
||||||
|
| The Picture of Dorian Gray | 30 |
|
||||||
|
| One Day in the Life of Ivan Denisovich | 30 |
|
||||||
|
| I Who Have Never Known Men | 30 |
|
||||||
|
| The Joke | 30 |
|
||||||
|
|
||||||
|
## Intended Use
|
||||||
|
|
||||||
|
- Philosophical Q&A with opinionated, crisp responses
|
||||||
|
- Edge deployment via Ollama or LM Studio using the GGUF
|
||||||
|
- Exploring what personality-driven fine-tuning looks like at 360M parameters
|
||||||
|
|
||||||
|
## How to Use
|
||||||
|
|
||||||
|
```python
|
||||||
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||||
|
|
||||||
|
model_id = "amitbehura/philosophy-oracle-smollm2-360m"
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||||
|
model = AutoModelForCausalLM.from_pretrained(model_id)
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": "How should one live in the face of the certainty of death?"}
|
||||||
|
]
|
||||||
|
|
||||||
|
input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt")
|
||||||
|
output = model.generate(input_ids, max_new_tokens=200)
|
||||||
|
print(tokenizer.decode(output[0]))
|
||||||
|
```
|
||||||
|
|
||||||
|
### GGUF (Local Use)
|
||||||
|
|
||||||
|
Use `philosophy-oracle-smollm2-360m-Q8_0.gguf` with Ollama or LM Studio directly.
|
||||||
|
|
||||||
|
## Limitations
|
||||||
|
|
||||||
|
- 360M parameter model — reasoning depth is limited by size
|
||||||
|
- Occasionally invents philosophical terminology to maintain persona
|
||||||
|
- Not suitable for factual Q&A — designed for philosophical voice, not accuracy
|
||||||
|
- Responses may reflect translator style rather than original author intent
|
||||||
|
|
||||||
|
## Sample Output
|
||||||
|
|
||||||
|
**Q: How should one live in the face of the certainty of death?**
|
||||||
|
|
||||||
|
> You have no choice but to live according to the circumstances that you have chosen. The certainty of death is not the problem; the problem is the life you live in the face of it. Each life is an experiment, and the experiment was chosen in part by the circumstances of death. Live according to the circumstances of your experiment, and then live.
|
||||||
|
|
||||||
|
## Author
|
||||||
|
|
||||||
|
Built by [Amit Behura](https://amitbehura.github.io) as a personal experiment in personality-driven fine-tuning on a single laptop GPU (RTX 2050, 4GB VRAM).
|
||||||
|
|
||||||
|
Full writeup on the methodology, training decisions, and results available on [Substack](https://amitbehura.substack.com/p/fine-tuning-own-philosophy-small).
|
||||||
3
added_tokens.json
Normal file
3
added_tokens.json
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"<|PAD_TOKEN|>": 49152
|
||||||
|
}
|
||||||
6
chat_template.jinja
Normal file
6
chat_template.jinja
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system
|
||||||
|
You are a helpful AI assistant named SmolLM, trained by Hugging Face<|im_end|>
|
||||||
|
' }}{% endif %}{{'<|im_start|>' + message['role'] + '
|
||||||
|
' + message['content'] + '<|im_end|>' + '
|
||||||
|
'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
|
||||||
|
' }}{% endif %}
|
||||||
38
config.json
Normal file
38
config.json
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
{
|
||||||
|
"architectures": [
|
||||||
|
"LlamaForCausalLM"
|
||||||
|
],
|
||||||
|
"attention_bias": false,
|
||||||
|
"attention_dropout": 0.0,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"torch_dtype": "bfloat16",
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"head_dim": 64,
|
||||||
|
"hidden_act": "silu",
|
||||||
|
"hidden_size": 960,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"intermediate_size": 2560,
|
||||||
|
"is_llama_config": true,
|
||||||
|
"max_position_embeddings": 8192,
|
||||||
|
"mlp_bias": false,
|
||||||
|
"model_type": "llama",
|
||||||
|
"num_attention_heads": 15,
|
||||||
|
"num_hidden_layers": 32,
|
||||||
|
"num_key_value_heads": 5,
|
||||||
|
"pad_token_id": 49152,
|
||||||
|
"pretraining_tp": 1,
|
||||||
|
"rms_norm_eps": 1e-05,
|
||||||
|
"rope_interleaved": false,
|
||||||
|
"rope_scaling": null,
|
||||||
|
"rope_theta": 100000,
|
||||||
|
"tie_word_embeddings": true,
|
||||||
|
"transformers.js_config": {
|
||||||
|
"kv_cache_dtype": {
|
||||||
|
"fp16": "float16",
|
||||||
|
"q4f16": "float16"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"unsloth_version": "2026.4.6",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 49153
|
||||||
|
}
|
||||||
3
export_metadata.json
Normal file
3
export_metadata.json
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"base_model": "unsloth/SmolLM2-360M-Instruct"
|
||||||
|
}
|
||||||
48901
merges.txt
Normal file
48901
merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
3
model.safetensors
Normal file
3
model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:18a280a19b3e27e01d8d2efd44efd74c0dc4651d8e4d4b8bfef4d51a88daca74
|
||||||
|
size 723676832
|
||||||
3
philosophy-oracle-smollm2-360m-Q8_0.gguf
Normal file
3
philosophy-oracle-smollm2-360m-Q8_0.gguf
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:c28a723dbe1506fef07255eafdf843486610ef50e1f9c47e04b60446d02fdbc8
|
||||||
|
size 386405984
|
||||||
34
special_tokens_map.json
Normal file
34
special_tokens_map.json
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
{
|
||||||
|
"additional_special_tokens": [
|
||||||
|
"<|im_start|>",
|
||||||
|
"<|im_end|>"
|
||||||
|
],
|
||||||
|
"bos_token": {
|
||||||
|
"content": "<|im_start|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"eos_token": {
|
||||||
|
"content": "<|im_end|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"pad_token": {
|
||||||
|
"content": "<|PAD_TOKEN|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"unk_token": {
|
||||||
|
"content": "�",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
}
|
||||||
|
}
|
||||||
244967
tokenizer.json
Normal file
244967
tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
176
tokenizer_config.json
Normal file
176
tokenizer_config.json
Normal file
@@ -0,0 +1,176 @@
|
|||||||
|
{
|
||||||
|
"add_prefix_space": false,
|
||||||
|
"added_tokens_decoder": {
|
||||||
|
"0": {
|
||||||
|
"content": "<|endoftext|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"1": {
|
||||||
|
"content": "<|im_start|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"2": {
|
||||||
|
"content": "<|im_end|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"3": {
|
||||||
|
"content": "<repo_name>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"4": {
|
||||||
|
"content": "<reponame>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"5": {
|
||||||
|
"content": "<file_sep>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"6": {
|
||||||
|
"content": "<filename>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"7": {
|
||||||
|
"content": "<gh_stars>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"8": {
|
||||||
|
"content": "<issue_start>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"9": {
|
||||||
|
"content": "<issue_comment>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"10": {
|
||||||
|
"content": "<issue_closed>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"11": {
|
||||||
|
"content": "<jupyter_start>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"12": {
|
||||||
|
"content": "<jupyter_text>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"13": {
|
||||||
|
"content": "<jupyter_code>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"14": {
|
||||||
|
"content": "<jupyter_output>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"15": {
|
||||||
|
"content": "<jupyter_script>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"16": {
|
||||||
|
"content": "<empty_output>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"24211": {
|
||||||
|
"content": "�",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"49152": {
|
||||||
|
"content": "<|PAD_TOKEN|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additional_special_tokens": [
|
||||||
|
"<|im_start|>",
|
||||||
|
"<|im_end|>"
|
||||||
|
],
|
||||||
|
"bos_token": "<|im_start|>",
|
||||||
|
"clean_up_tokenization_spaces": false,
|
||||||
|
"eos_token": "<|im_end|>",
|
||||||
|
"extra_special_tokens": {},
|
||||||
|
"max_length": 2048,
|
||||||
|
"model_max_length": 8192,
|
||||||
|
"pad_token": "<|PAD_TOKEN|>",
|
||||||
|
"padding_side": "right",
|
||||||
|
"stride": 0,
|
||||||
|
"tokenizer_class": "GPT2Tokenizer",
|
||||||
|
"truncation_side": "right",
|
||||||
|
"truncation_strategy": "longest_first",
|
||||||
|
"unk_token": "�",
|
||||||
|
"vocab_size": 49152,
|
||||||
|
"chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful AI assistant named SmolLM, trained by Hugging Face<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
|
||||||
|
}
|
||||||
1
vocab.json
Normal file
1
vocab.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user