初始化项目,由ModelHub XC社区提供模型

Model: Orion-zhen/Qwen2.5-7B-Gutenberg-KTO
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-30 03:21:20 +08:00
commit 3adea7528c
29 changed files with 455454 additions and 0 deletions

35
.gitattributes vendored Normal file
View File

@@ -0,0 +1,35 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text

48
README.md Normal file
View File

@@ -0,0 +1,48 @@
---
license: gpl-3.0
datasets:
- Orion-zhen/kto-gutenberg
language:
- zh
- en
base_model:
- Orion-zhen/Qwen2.5-7B-Instruct-Uncensored
pipeline_tag: text-generation
---
# Qwen2.5-7B-Gutenberg-KTO
This model is fine tuned over gutenberg datasets using kto strategy. It's my first time to use kto strategy, and I'm not sure how the model actually performs.
Compared to those large companies which remove accessories such as charger and cables from packages, I have achieved **real** environment protection by **truly** reducing energy consumption, rather than shifting costs to consumers.
Checkout GGUF here: [Orion-zhen/Qwen2.5-7B-Gutenberg-KTO-Q6_K-GGUF](https://huggingface.co/Orion-zhen/Qwen2.5-7B-Gutenberg-KTO-Q6_K-GGUF)
## Details
### Platform
~~I randomly grabbed some rubbish from a second-hand market and built a PC~~
I carefully selected various dedicated hardwares and constructed an incomparable home server, which I entitled the **Great Server**:
- CPU: Intel Core i3-4160
- Memory: 8G DDR3, single channel
- GPU: Tesla P4, TDP 75W, boasting its **Eco friendly energy consumption**
- Disk: 1TB M.2 NVME, PCIe 4.0
### Training
To practice the **eco-friendly training**, I utilized various methods, including adam-mini, qlora and unsloth, to minimize VRAM and energy usage, as well as accelerating training speed.
- dataset: [Orion-zhen/kto-gutenberg](https://huggingface.co/datasets/Orion-zhen/kto-gutenberg)
- epoch: 2
- gradient accumulation: 8
- batch size: 1
- KTO perf beta: 0.1
### Train log
![training_loss](./training_loss.png)
![training_eval_loss](./training_eval_loss.png)

24
added_tokens.json Normal file
View File

@@ -0,0 +1,24 @@
{
"</tool_call>": 151658,
"<tool_call>": 151657,
"<|box_end|>": 151649,
"<|box_start|>": 151648,
"<|endoftext|>": 151643,
"<|file_sep|>": 151664,
"<|fim_middle|>": 151660,
"<|fim_pad|>": 151662,
"<|fim_prefix|>": 151659,
"<|fim_suffix|>": 151661,
"<|im_end|>": 151645,
"<|im_start|>": 151644,
"<|image_pad|>": 151655,
"<|object_ref_end|>": 151647,
"<|object_ref_start|>": 151646,
"<|quad_end|>": 151651,
"<|quad_start|>": 151650,
"<|repo_name|>": 151663,
"<|video_pad|>": 151656,
"<|vision_end|>": 151653,
"<|vision_pad|>": 151654,
"<|vision_start|>": 151652
}

29
config.json Normal file
View File

@@ -0,0 +1,29 @@
{
"_name_or_path": "/home/orion/ai/Models/qwen2.5-7b",
"architectures": [
"Qwen2ForCausalLM"
],
"attention_dropout": 0.0,
"bos_token_id": 151643,
"eos_token_id": 151645,
"hidden_act": "silu",
"hidden_size": 3584,
"initializer_range": 0.02,
"intermediate_size": 18944,
"max_position_embeddings": 32768,
"max_window_layers": 28,
"model_type": "qwen2",
"num_attention_heads": 28,
"num_hidden_layers": 28,
"num_key_value_heads": 4,
"rms_norm_eps": 1e-06,
"rope_scaling": null,
"rope_theta": 1000000.0,
"sliding_window": null,
"tie_word_embeddings": false,
"torch_dtype": "bfloat16",
"transformers_version": "4.45.2",
"use_cache": true,
"use_sliding_window": false,
"vocab_size": 152064
}

14
generation_config.json Normal file
View File

@@ -0,0 +1,14 @@
{
"bos_token_id": 151643,
"do_sample": true,
"eos_token_id": [
151645,
151643
],
"pad_token_id": 151643,
"repetition_penalty": 1.05,
"temperature": 0.7,
"top_k": 20,
"top_p": 0.8,
"transformers_version": "4.45.2"
}

151388
merges.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f7750ef811855400eaf0017804c4c12081ba254debbfcf4a3235384784c5a429
size 1089994896

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:a7419815472cb85d05f2723eb95ad8aad916e2549c6cf653adb2af2cf8751773
size 990964136

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:cf1641b1ce049be2460c0f67c0733542f8df6f445a5850448fcdce343cd74134
size 932233880

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:edb3d6ddb636ed8f040e0a5525b748ddb24c15d726f2093cb6abd9e1e90d6bb6
size 932233880

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b5ea9f823b1bab751ca9d31b48763743e3f4dd8c2974e3e60ee410263cd5982c
size 932233880

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:aea2eb8e4028d8ae7c145483f6f17bac6f9f98a9f0403a60b43d8ca6e0821a60
size 932233872

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:77f20f1747dec3eafb97db1e34cd8967461a21c5aa76fa934ece5c80a3d6cc4e
size 932233904

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b0766cc585b7b5b78b1b780bf755bae2f2622417ee0b003030586b815e20a5e9
size 932233904

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b75f5326d12b18a490a98d688c49da0d19d1ff1d27680d8d1c5a47785cc71a11
size 932233904

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:209176ae1762be29a3e7ddf2596a7e0fd4362ae9ccbae811d4ff4dae35866cc3
size 932233904

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9e417f9701f4875ea80ea02d1761c0f30a76be1f3b38afdc3118d2def8c16d2b
size 932233904

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c7284b5e252ca3c6c0b7ea9b4572f28ca23d9ff0df3747212f4e86f65627170c
size 932233904

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6398e524bfe50316c282770f0bfef7ece499a8193bd2aa0e463253c6382098d5
size 932233904

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:27365195d57d4325424741992b07269f8df93589ce038808f87ae2a0152c283f
size 932233904

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:06006972c3be88e8a44fe21cfe2b0472b130780c781a741f8f90f1fe5ba3aae2
size 1089994880

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:a077bf080912c640677d8a302387ceea083c29283ebbced2e98cec68b38cb5f7
size 873510904

View File

@@ -0,0 +1,346 @@
{
"metadata": {
"total_size": 15231233024
},
"weight_map": {
"lm_head.weight": "model-00015-of-00016.safetensors",
"model.embed_tokens.weight": "model-00001-of-00016.safetensors",
"model.layers.0.input_layernorm.weight": "model-00002-of-00016.safetensors",
"model.layers.0.mlp.down_proj.weight": "model-00002-of-00016.safetensors",
"model.layers.0.mlp.gate_proj.weight": "model-00002-of-00016.safetensors",
"model.layers.0.mlp.up_proj.weight": "model-00002-of-00016.safetensors",
"model.layers.0.post_attention_layernorm.weight": "model-00002-of-00016.safetensors",
"model.layers.0.self_attn.k_proj.bias": "model-00002-of-00016.safetensors",
"model.layers.0.self_attn.k_proj.weight": "model-00002-of-00016.safetensors",
"model.layers.0.self_attn.o_proj.weight": "model-00002-of-00016.safetensors",
"model.layers.0.self_attn.q_proj.bias": "model-00002-of-00016.safetensors",
"model.layers.0.self_attn.q_proj.weight": "model-00002-of-00016.safetensors",
"model.layers.0.self_attn.v_proj.bias": "model-00002-of-00016.safetensors",
"model.layers.0.self_attn.v_proj.weight": "model-00002-of-00016.safetensors",
"model.layers.1.input_layernorm.weight": "model-00002-of-00016.safetensors",
"model.layers.1.mlp.down_proj.weight": "model-00002-of-00016.safetensors",
"model.layers.1.mlp.gate_proj.weight": "model-00002-of-00016.safetensors",
"model.layers.1.mlp.up_proj.weight": "model-00002-of-00016.safetensors",
"model.layers.1.post_attention_layernorm.weight": "model-00002-of-00016.safetensors",
"model.layers.1.self_attn.k_proj.bias": "model-00002-of-00016.safetensors",
"model.layers.1.self_attn.k_proj.weight": "model-00002-of-00016.safetensors",
"model.layers.1.self_attn.o_proj.weight": "model-00002-of-00016.safetensors",
"model.layers.1.self_attn.q_proj.bias": "model-00002-of-00016.safetensors",
"model.layers.1.self_attn.q_proj.weight": "model-00002-of-00016.safetensors",
"model.layers.1.self_attn.v_proj.bias": "model-00002-of-00016.safetensors",
"model.layers.1.self_attn.v_proj.weight": "model-00002-of-00016.safetensors",
"model.layers.10.input_layernorm.weight": "model-00007-of-00016.safetensors",
"model.layers.10.mlp.down_proj.weight": "model-00007-of-00016.safetensors",
"model.layers.10.mlp.gate_proj.weight": "model-00007-of-00016.safetensors",
"model.layers.10.mlp.up_proj.weight": "model-00007-of-00016.safetensors",
"model.layers.10.post_attention_layernorm.weight": "model-00007-of-00016.safetensors",
"model.layers.10.self_attn.k_proj.bias": "model-00006-of-00016.safetensors",
"model.layers.10.self_attn.k_proj.weight": "model-00006-of-00016.safetensors",
"model.layers.10.self_attn.o_proj.weight": "model-00006-of-00016.safetensors",
"model.layers.10.self_attn.q_proj.bias": "model-00006-of-00016.safetensors",
"model.layers.10.self_attn.q_proj.weight": "model-00006-of-00016.safetensors",
"model.layers.10.self_attn.v_proj.bias": "model-00006-of-00016.safetensors",
"model.layers.10.self_attn.v_proj.weight": "model-00006-of-00016.safetensors",
"model.layers.11.input_layernorm.weight": "model-00007-of-00016.safetensors",
"model.layers.11.mlp.down_proj.weight": "model-00007-of-00016.safetensors",
"model.layers.11.mlp.gate_proj.weight": "model-00007-of-00016.safetensors",
"model.layers.11.mlp.up_proj.weight": "model-00007-of-00016.safetensors",
"model.layers.11.post_attention_layernorm.weight": "model-00007-of-00016.safetensors",
"model.layers.11.self_attn.k_proj.bias": "model-00007-of-00016.safetensors",
"model.layers.11.self_attn.k_proj.weight": "model-00007-of-00016.safetensors",
"model.layers.11.self_attn.o_proj.weight": "model-00007-of-00016.safetensors",
"model.layers.11.self_attn.q_proj.bias": "model-00007-of-00016.safetensors",
"model.layers.11.self_attn.q_proj.weight": "model-00007-of-00016.safetensors",
"model.layers.11.self_attn.v_proj.bias": "model-00007-of-00016.safetensors",
"model.layers.11.self_attn.v_proj.weight": "model-00007-of-00016.safetensors",
"model.layers.12.input_layernorm.weight": "model-00008-of-00016.safetensors",
"model.layers.12.mlp.down_proj.weight": "model-00008-of-00016.safetensors",
"model.layers.12.mlp.gate_proj.weight": "model-00008-of-00016.safetensors",
"model.layers.12.mlp.up_proj.weight": "model-00008-of-00016.safetensors",
"model.layers.12.post_attention_layernorm.weight": "model-00008-of-00016.safetensors",
"model.layers.12.self_attn.k_proj.bias": "model-00007-of-00016.safetensors",
"model.layers.12.self_attn.k_proj.weight": "model-00007-of-00016.safetensors",
"model.layers.12.self_attn.o_proj.weight": "model-00007-of-00016.safetensors",
"model.layers.12.self_attn.q_proj.bias": "model-00007-of-00016.safetensors",
"model.layers.12.self_attn.q_proj.weight": "model-00007-of-00016.safetensors",
"model.layers.12.self_attn.v_proj.bias": "model-00007-of-00016.safetensors",
"model.layers.12.self_attn.v_proj.weight": "model-00007-of-00016.safetensors",
"model.layers.13.input_layernorm.weight": "model-00008-of-00016.safetensors",
"model.layers.13.mlp.down_proj.weight": "model-00008-of-00016.safetensors",
"model.layers.13.mlp.gate_proj.weight": "model-00008-of-00016.safetensors",
"model.layers.13.mlp.up_proj.weight": "model-00008-of-00016.safetensors",
"model.layers.13.post_attention_layernorm.weight": "model-00008-of-00016.safetensors",
"model.layers.13.self_attn.k_proj.bias": "model-00008-of-00016.safetensors",
"model.layers.13.self_attn.k_proj.weight": "model-00008-of-00016.safetensors",
"model.layers.13.self_attn.o_proj.weight": "model-00008-of-00016.safetensors",
"model.layers.13.self_attn.q_proj.bias": "model-00008-of-00016.safetensors",
"model.layers.13.self_attn.q_proj.weight": "model-00008-of-00016.safetensors",
"model.layers.13.self_attn.v_proj.bias": "model-00008-of-00016.safetensors",
"model.layers.13.self_attn.v_proj.weight": "model-00008-of-00016.safetensors",
"model.layers.14.input_layernorm.weight": "model-00009-of-00016.safetensors",
"model.layers.14.mlp.down_proj.weight": "model-00009-of-00016.safetensors",
"model.layers.14.mlp.gate_proj.weight": "model-00009-of-00016.safetensors",
"model.layers.14.mlp.up_proj.weight": "model-00009-of-00016.safetensors",
"model.layers.14.post_attention_layernorm.weight": "model-00009-of-00016.safetensors",
"model.layers.14.self_attn.k_proj.bias": "model-00008-of-00016.safetensors",
"model.layers.14.self_attn.k_proj.weight": "model-00008-of-00016.safetensors",
"model.layers.14.self_attn.o_proj.weight": "model-00008-of-00016.safetensors",
"model.layers.14.self_attn.q_proj.bias": "model-00008-of-00016.safetensors",
"model.layers.14.self_attn.q_proj.weight": "model-00008-of-00016.safetensors",
"model.layers.14.self_attn.v_proj.bias": "model-00008-of-00016.safetensors",
"model.layers.14.self_attn.v_proj.weight": "model-00008-of-00016.safetensors",
"model.layers.15.input_layernorm.weight": "model-00009-of-00016.safetensors",
"model.layers.15.mlp.down_proj.weight": "model-00009-of-00016.safetensors",
"model.layers.15.mlp.gate_proj.weight": "model-00009-of-00016.safetensors",
"model.layers.15.mlp.up_proj.weight": "model-00009-of-00016.safetensors",
"model.layers.15.post_attention_layernorm.weight": "model-00009-of-00016.safetensors",
"model.layers.15.self_attn.k_proj.bias": "model-00009-of-00016.safetensors",
"model.layers.15.self_attn.k_proj.weight": "model-00009-of-00016.safetensors",
"model.layers.15.self_attn.o_proj.weight": "model-00009-of-00016.safetensors",
"model.layers.15.self_attn.q_proj.bias": "model-00009-of-00016.safetensors",
"model.layers.15.self_attn.q_proj.weight": "model-00009-of-00016.safetensors",
"model.layers.15.self_attn.v_proj.bias": "model-00009-of-00016.safetensors",
"model.layers.15.self_attn.v_proj.weight": "model-00009-of-00016.safetensors",
"model.layers.16.input_layernorm.weight": "model-00010-of-00016.safetensors",
"model.layers.16.mlp.down_proj.weight": "model-00010-of-00016.safetensors",
"model.layers.16.mlp.gate_proj.weight": "model-00010-of-00016.safetensors",
"model.layers.16.mlp.up_proj.weight": "model-00010-of-00016.safetensors",
"model.layers.16.post_attention_layernorm.weight": "model-00010-of-00016.safetensors",
"model.layers.16.self_attn.k_proj.bias": "model-00009-of-00016.safetensors",
"model.layers.16.self_attn.k_proj.weight": "model-00009-of-00016.safetensors",
"model.layers.16.self_attn.o_proj.weight": "model-00009-of-00016.safetensors",
"model.layers.16.self_attn.q_proj.bias": "model-00009-of-00016.safetensors",
"model.layers.16.self_attn.q_proj.weight": "model-00009-of-00016.safetensors",
"model.layers.16.self_attn.v_proj.bias": "model-00009-of-00016.safetensors",
"model.layers.16.self_attn.v_proj.weight": "model-00009-of-00016.safetensors",
"model.layers.17.input_layernorm.weight": "model-00010-of-00016.safetensors",
"model.layers.17.mlp.down_proj.weight": "model-00010-of-00016.safetensors",
"model.layers.17.mlp.gate_proj.weight": "model-00010-of-00016.safetensors",
"model.layers.17.mlp.up_proj.weight": "model-00010-of-00016.safetensors",
"model.layers.17.post_attention_layernorm.weight": "model-00010-of-00016.safetensors",
"model.layers.17.self_attn.k_proj.bias": "model-00010-of-00016.safetensors",
"model.layers.17.self_attn.k_proj.weight": "model-00010-of-00016.safetensors",
"model.layers.17.self_attn.o_proj.weight": "model-00010-of-00016.safetensors",
"model.layers.17.self_attn.q_proj.bias": "model-00010-of-00016.safetensors",
"model.layers.17.self_attn.q_proj.weight": "model-00010-of-00016.safetensors",
"model.layers.17.self_attn.v_proj.bias": "model-00010-of-00016.safetensors",
"model.layers.17.self_attn.v_proj.weight": "model-00010-of-00016.safetensors",
"model.layers.18.input_layernorm.weight": "model-00011-of-00016.safetensors",
"model.layers.18.mlp.down_proj.weight": "model-00011-of-00016.safetensors",
"model.layers.18.mlp.gate_proj.weight": "model-00011-of-00016.safetensors",
"model.layers.18.mlp.up_proj.weight": "model-00011-of-00016.safetensors",
"model.layers.18.post_attention_layernorm.weight": "model-00011-of-00016.safetensors",
"model.layers.18.self_attn.k_proj.bias": "model-00010-of-00016.safetensors",
"model.layers.18.self_attn.k_proj.weight": "model-00010-of-00016.safetensors",
"model.layers.18.self_attn.o_proj.weight": "model-00010-of-00016.safetensors",
"model.layers.18.self_attn.q_proj.bias": "model-00010-of-00016.safetensors",
"model.layers.18.self_attn.q_proj.weight": "model-00010-of-00016.safetensors",
"model.layers.18.self_attn.v_proj.bias": "model-00010-of-00016.safetensors",
"model.layers.18.self_attn.v_proj.weight": "model-00010-of-00016.safetensors",
"model.layers.19.input_layernorm.weight": "model-00011-of-00016.safetensors",
"model.layers.19.mlp.down_proj.weight": "model-00011-of-00016.safetensors",
"model.layers.19.mlp.gate_proj.weight": "model-00011-of-00016.safetensors",
"model.layers.19.mlp.up_proj.weight": "model-00011-of-00016.safetensors",
"model.layers.19.post_attention_layernorm.weight": "model-00011-of-00016.safetensors",
"model.layers.19.self_attn.k_proj.bias": "model-00011-of-00016.safetensors",
"model.layers.19.self_attn.k_proj.weight": "model-00011-of-00016.safetensors",
"model.layers.19.self_attn.o_proj.weight": "model-00011-of-00016.safetensors",
"model.layers.19.self_attn.q_proj.bias": "model-00011-of-00016.safetensors",
"model.layers.19.self_attn.q_proj.weight": "model-00011-of-00016.safetensors",
"model.layers.19.self_attn.v_proj.bias": "model-00011-of-00016.safetensors",
"model.layers.19.self_attn.v_proj.weight": "model-00011-of-00016.safetensors",
"model.layers.2.input_layernorm.weight": "model-00003-of-00016.safetensors",
"model.layers.2.mlp.down_proj.weight": "model-00003-of-00016.safetensors",
"model.layers.2.mlp.gate_proj.weight": "model-00003-of-00016.safetensors",
"model.layers.2.mlp.up_proj.weight": "model-00003-of-00016.safetensors",
"model.layers.2.post_attention_layernorm.weight": "model-00003-of-00016.safetensors",
"model.layers.2.self_attn.k_proj.bias": "model-00002-of-00016.safetensors",
"model.layers.2.self_attn.k_proj.weight": "model-00002-of-00016.safetensors",
"model.layers.2.self_attn.o_proj.weight": "model-00002-of-00016.safetensors",
"model.layers.2.self_attn.q_proj.bias": "model-00002-of-00016.safetensors",
"model.layers.2.self_attn.q_proj.weight": "model-00002-of-00016.safetensors",
"model.layers.2.self_attn.v_proj.bias": "model-00002-of-00016.safetensors",
"model.layers.2.self_attn.v_proj.weight": "model-00002-of-00016.safetensors",
"model.layers.20.input_layernorm.weight": "model-00012-of-00016.safetensors",
"model.layers.20.mlp.down_proj.weight": "model-00012-of-00016.safetensors",
"model.layers.20.mlp.gate_proj.weight": "model-00012-of-00016.safetensors",
"model.layers.20.mlp.up_proj.weight": "model-00012-of-00016.safetensors",
"model.layers.20.post_attention_layernorm.weight": "model-00012-of-00016.safetensors",
"model.layers.20.self_attn.k_proj.bias": "model-00011-of-00016.safetensors",
"model.layers.20.self_attn.k_proj.weight": "model-00011-of-00016.safetensors",
"model.layers.20.self_attn.o_proj.weight": "model-00011-of-00016.safetensors",
"model.layers.20.self_attn.q_proj.bias": "model-00011-of-00016.safetensors",
"model.layers.20.self_attn.q_proj.weight": "model-00011-of-00016.safetensors",
"model.layers.20.self_attn.v_proj.bias": "model-00011-of-00016.safetensors",
"model.layers.20.self_attn.v_proj.weight": "model-00011-of-00016.safetensors",
"model.layers.21.input_layernorm.weight": "model-00012-of-00016.safetensors",
"model.layers.21.mlp.down_proj.weight": "model-00012-of-00016.safetensors",
"model.layers.21.mlp.gate_proj.weight": "model-00012-of-00016.safetensors",
"model.layers.21.mlp.up_proj.weight": "model-00012-of-00016.safetensors",
"model.layers.21.post_attention_layernorm.weight": "model-00012-of-00016.safetensors",
"model.layers.21.self_attn.k_proj.bias": "model-00012-of-00016.safetensors",
"model.layers.21.self_attn.k_proj.weight": "model-00012-of-00016.safetensors",
"model.layers.21.self_attn.o_proj.weight": "model-00012-of-00016.safetensors",
"model.layers.21.self_attn.q_proj.bias": "model-00012-of-00016.safetensors",
"model.layers.21.self_attn.q_proj.weight": "model-00012-of-00016.safetensors",
"model.layers.21.self_attn.v_proj.bias": "model-00012-of-00016.safetensors",
"model.layers.21.self_attn.v_proj.weight": "model-00012-of-00016.safetensors",
"model.layers.22.input_layernorm.weight": "model-00013-of-00016.safetensors",
"model.layers.22.mlp.down_proj.weight": "model-00013-of-00016.safetensors",
"model.layers.22.mlp.gate_proj.weight": "model-00013-of-00016.safetensors",
"model.layers.22.mlp.up_proj.weight": "model-00013-of-00016.safetensors",
"model.layers.22.post_attention_layernorm.weight": "model-00013-of-00016.safetensors",
"model.layers.22.self_attn.k_proj.bias": "model-00012-of-00016.safetensors",
"model.layers.22.self_attn.k_proj.weight": "model-00012-of-00016.safetensors",
"model.layers.22.self_attn.o_proj.weight": "model-00012-of-00016.safetensors",
"model.layers.22.self_attn.q_proj.bias": "model-00012-of-00016.safetensors",
"model.layers.22.self_attn.q_proj.weight": "model-00012-of-00016.safetensors",
"model.layers.22.self_attn.v_proj.bias": "model-00012-of-00016.safetensors",
"model.layers.22.self_attn.v_proj.weight": "model-00012-of-00016.safetensors",
"model.layers.23.input_layernorm.weight": "model-00013-of-00016.safetensors",
"model.layers.23.mlp.down_proj.weight": "model-00013-of-00016.safetensors",
"model.layers.23.mlp.gate_proj.weight": "model-00013-of-00016.safetensors",
"model.layers.23.mlp.up_proj.weight": "model-00013-of-00016.safetensors",
"model.layers.23.post_attention_layernorm.weight": "model-00013-of-00016.safetensors",
"model.layers.23.self_attn.k_proj.bias": "model-00013-of-00016.safetensors",
"model.layers.23.self_attn.k_proj.weight": "model-00013-of-00016.safetensors",
"model.layers.23.self_attn.o_proj.weight": "model-00013-of-00016.safetensors",
"model.layers.23.self_attn.q_proj.bias": "model-00013-of-00016.safetensors",
"model.layers.23.self_attn.q_proj.weight": "model-00013-of-00016.safetensors",
"model.layers.23.self_attn.v_proj.bias": "model-00013-of-00016.safetensors",
"model.layers.23.self_attn.v_proj.weight": "model-00013-of-00016.safetensors",
"model.layers.24.input_layernorm.weight": "model-00014-of-00016.safetensors",
"model.layers.24.mlp.down_proj.weight": "model-00014-of-00016.safetensors",
"model.layers.24.mlp.gate_proj.weight": "model-00014-of-00016.safetensors",
"model.layers.24.mlp.up_proj.weight": "model-00014-of-00016.safetensors",
"model.layers.24.post_attention_layernorm.weight": "model-00014-of-00016.safetensors",
"model.layers.24.self_attn.k_proj.bias": "model-00013-of-00016.safetensors",
"model.layers.24.self_attn.k_proj.weight": "model-00013-of-00016.safetensors",
"model.layers.24.self_attn.o_proj.weight": "model-00013-of-00016.safetensors",
"model.layers.24.self_attn.q_proj.bias": "model-00013-of-00016.safetensors",
"model.layers.24.self_attn.q_proj.weight": "model-00013-of-00016.safetensors",
"model.layers.24.self_attn.v_proj.bias": "model-00013-of-00016.safetensors",
"model.layers.24.self_attn.v_proj.weight": "model-00013-of-00016.safetensors",
"model.layers.25.input_layernorm.weight": "model-00014-of-00016.safetensors",
"model.layers.25.mlp.down_proj.weight": "model-00014-of-00016.safetensors",
"model.layers.25.mlp.gate_proj.weight": "model-00014-of-00016.safetensors",
"model.layers.25.mlp.up_proj.weight": "model-00014-of-00016.safetensors",
"model.layers.25.post_attention_layernorm.weight": "model-00014-of-00016.safetensors",
"model.layers.25.self_attn.k_proj.bias": "model-00014-of-00016.safetensors",
"model.layers.25.self_attn.k_proj.weight": "model-00014-of-00016.safetensors",
"model.layers.25.self_attn.o_proj.weight": "model-00014-of-00016.safetensors",
"model.layers.25.self_attn.q_proj.bias": "model-00014-of-00016.safetensors",
"model.layers.25.self_attn.q_proj.weight": "model-00014-of-00016.safetensors",
"model.layers.25.self_attn.v_proj.bias": "model-00014-of-00016.safetensors",
"model.layers.25.self_attn.v_proj.weight": "model-00014-of-00016.safetensors",
"model.layers.26.input_layernorm.weight": "model-00016-of-00016.safetensors",
"model.layers.26.mlp.down_proj.weight": "model-00016-of-00016.safetensors",
"model.layers.26.mlp.gate_proj.weight": "model-00016-of-00016.safetensors",
"model.layers.26.mlp.up_proj.weight": "model-00016-of-00016.safetensors",
"model.layers.26.post_attention_layernorm.weight": "model-00016-of-00016.safetensors",
"model.layers.26.self_attn.k_proj.bias": "model-00014-of-00016.safetensors",
"model.layers.26.self_attn.k_proj.weight": "model-00014-of-00016.safetensors",
"model.layers.26.self_attn.o_proj.weight": "model-00014-of-00016.safetensors",
"model.layers.26.self_attn.q_proj.bias": "model-00014-of-00016.safetensors",
"model.layers.26.self_attn.q_proj.weight": "model-00014-of-00016.safetensors",
"model.layers.26.self_attn.v_proj.bias": "model-00014-of-00016.safetensors",
"model.layers.26.self_attn.v_proj.weight": "model-00014-of-00016.safetensors",
"model.layers.27.input_layernorm.weight": "model-00016-of-00016.safetensors",
"model.layers.27.mlp.down_proj.weight": "model-00016-of-00016.safetensors",
"model.layers.27.mlp.gate_proj.weight": "model-00016-of-00016.safetensors",
"model.layers.27.mlp.up_proj.weight": "model-00016-of-00016.safetensors",
"model.layers.27.post_attention_layernorm.weight": "model-00016-of-00016.safetensors",
"model.layers.27.self_attn.k_proj.bias": "model-00016-of-00016.safetensors",
"model.layers.27.self_attn.k_proj.weight": "model-00016-of-00016.safetensors",
"model.layers.27.self_attn.o_proj.weight": "model-00016-of-00016.safetensors",
"model.layers.27.self_attn.q_proj.bias": "model-00016-of-00016.safetensors",
"model.layers.27.self_attn.q_proj.weight": "model-00016-of-00016.safetensors",
"model.layers.27.self_attn.v_proj.bias": "model-00016-of-00016.safetensors",
"model.layers.27.self_attn.v_proj.weight": "model-00016-of-00016.safetensors",
"model.layers.3.input_layernorm.weight": "model-00003-of-00016.safetensors",
"model.layers.3.mlp.down_proj.weight": "model-00003-of-00016.safetensors",
"model.layers.3.mlp.gate_proj.weight": "model-00003-of-00016.safetensors",
"model.layers.3.mlp.up_proj.weight": "model-00003-of-00016.safetensors",
"model.layers.3.post_attention_layernorm.weight": "model-00003-of-00016.safetensors",
"model.layers.3.self_attn.k_proj.bias": "model-00003-of-00016.safetensors",
"model.layers.3.self_attn.k_proj.weight": "model-00003-of-00016.safetensors",
"model.layers.3.self_attn.o_proj.weight": "model-00003-of-00016.safetensors",
"model.layers.3.self_attn.q_proj.bias": "model-00003-of-00016.safetensors",
"model.layers.3.self_attn.q_proj.weight": "model-00003-of-00016.safetensors",
"model.layers.3.self_attn.v_proj.bias": "model-00003-of-00016.safetensors",
"model.layers.3.self_attn.v_proj.weight": "model-00003-of-00016.safetensors",
"model.layers.4.input_layernorm.weight": "model-00004-of-00016.safetensors",
"model.layers.4.mlp.down_proj.weight": "model-00004-of-00016.safetensors",
"model.layers.4.mlp.gate_proj.weight": "model-00004-of-00016.safetensors",
"model.layers.4.mlp.up_proj.weight": "model-00004-of-00016.safetensors",
"model.layers.4.post_attention_layernorm.weight": "model-00004-of-00016.safetensors",
"model.layers.4.self_attn.k_proj.bias": "model-00003-of-00016.safetensors",
"model.layers.4.self_attn.k_proj.weight": "model-00003-of-00016.safetensors",
"model.layers.4.self_attn.o_proj.weight": "model-00003-of-00016.safetensors",
"model.layers.4.self_attn.q_proj.bias": "model-00003-of-00016.safetensors",
"model.layers.4.self_attn.q_proj.weight": "model-00003-of-00016.safetensors",
"model.layers.4.self_attn.v_proj.bias": "model-00003-of-00016.safetensors",
"model.layers.4.self_attn.v_proj.weight": "model-00003-of-00016.safetensors",
"model.layers.5.input_layernorm.weight": "model-00004-of-00016.safetensors",
"model.layers.5.mlp.down_proj.weight": "model-00004-of-00016.safetensors",
"model.layers.5.mlp.gate_proj.weight": "model-00004-of-00016.safetensors",
"model.layers.5.mlp.up_proj.weight": "model-00004-of-00016.safetensors",
"model.layers.5.post_attention_layernorm.weight": "model-00004-of-00016.safetensors",
"model.layers.5.self_attn.k_proj.bias": "model-00004-of-00016.safetensors",
"model.layers.5.self_attn.k_proj.weight": "model-00004-of-00016.safetensors",
"model.layers.5.self_attn.o_proj.weight": "model-00004-of-00016.safetensors",
"model.layers.5.self_attn.q_proj.bias": "model-00004-of-00016.safetensors",
"model.layers.5.self_attn.q_proj.weight": "model-00004-of-00016.safetensors",
"model.layers.5.self_attn.v_proj.bias": "model-00004-of-00016.safetensors",
"model.layers.5.self_attn.v_proj.weight": "model-00004-of-00016.safetensors",
"model.layers.6.input_layernorm.weight": "model-00005-of-00016.safetensors",
"model.layers.6.mlp.down_proj.weight": "model-00005-of-00016.safetensors",
"model.layers.6.mlp.gate_proj.weight": "model-00005-of-00016.safetensors",
"model.layers.6.mlp.up_proj.weight": "model-00005-of-00016.safetensors",
"model.layers.6.post_attention_layernorm.weight": "model-00005-of-00016.safetensors",
"model.layers.6.self_attn.k_proj.bias": "model-00004-of-00016.safetensors",
"model.layers.6.self_attn.k_proj.weight": "model-00004-of-00016.safetensors",
"model.layers.6.self_attn.o_proj.weight": "model-00004-of-00016.safetensors",
"model.layers.6.self_attn.q_proj.bias": "model-00004-of-00016.safetensors",
"model.layers.6.self_attn.q_proj.weight": "model-00004-of-00016.safetensors",
"model.layers.6.self_attn.v_proj.bias": "model-00004-of-00016.safetensors",
"model.layers.6.self_attn.v_proj.weight": "model-00004-of-00016.safetensors",
"model.layers.7.input_layernorm.weight": "model-00005-of-00016.safetensors",
"model.layers.7.mlp.down_proj.weight": "model-00005-of-00016.safetensors",
"model.layers.7.mlp.gate_proj.weight": "model-00005-of-00016.safetensors",
"model.layers.7.mlp.up_proj.weight": "model-00005-of-00016.safetensors",
"model.layers.7.post_attention_layernorm.weight": "model-00005-of-00016.safetensors",
"model.layers.7.self_attn.k_proj.bias": "model-00005-of-00016.safetensors",
"model.layers.7.self_attn.k_proj.weight": "model-00005-of-00016.safetensors",
"model.layers.7.self_attn.o_proj.weight": "model-00005-of-00016.safetensors",
"model.layers.7.self_attn.q_proj.bias": "model-00005-of-00016.safetensors",
"model.layers.7.self_attn.q_proj.weight": "model-00005-of-00016.safetensors",
"model.layers.7.self_attn.v_proj.bias": "model-00005-of-00016.safetensors",
"model.layers.7.self_attn.v_proj.weight": "model-00005-of-00016.safetensors",
"model.layers.8.input_layernorm.weight": "model-00006-of-00016.safetensors",
"model.layers.8.mlp.down_proj.weight": "model-00006-of-00016.safetensors",
"model.layers.8.mlp.gate_proj.weight": "model-00006-of-00016.safetensors",
"model.layers.8.mlp.up_proj.weight": "model-00006-of-00016.safetensors",
"model.layers.8.post_attention_layernorm.weight": "model-00006-of-00016.safetensors",
"model.layers.8.self_attn.k_proj.bias": "model-00005-of-00016.safetensors",
"model.layers.8.self_attn.k_proj.weight": "model-00005-of-00016.safetensors",
"model.layers.8.self_attn.o_proj.weight": "model-00005-of-00016.safetensors",
"model.layers.8.self_attn.q_proj.bias": "model-00005-of-00016.safetensors",
"model.layers.8.self_attn.q_proj.weight": "model-00005-of-00016.safetensors",
"model.layers.8.self_attn.v_proj.bias": "model-00005-of-00016.safetensors",
"model.layers.8.self_attn.v_proj.weight": "model-00005-of-00016.safetensors",
"model.layers.9.input_layernorm.weight": "model-00006-of-00016.safetensors",
"model.layers.9.mlp.down_proj.weight": "model-00006-of-00016.safetensors",
"model.layers.9.mlp.gate_proj.weight": "model-00006-of-00016.safetensors",
"model.layers.9.mlp.up_proj.weight": "model-00006-of-00016.safetensors",
"model.layers.9.post_attention_layernorm.weight": "model-00006-of-00016.safetensors",
"model.layers.9.self_attn.k_proj.bias": "model-00006-of-00016.safetensors",
"model.layers.9.self_attn.k_proj.weight": "model-00006-of-00016.safetensors",
"model.layers.9.self_attn.o_proj.weight": "model-00006-of-00016.safetensors",
"model.layers.9.self_attn.q_proj.bias": "model-00006-of-00016.safetensors",
"model.layers.9.self_attn.q_proj.weight": "model-00006-of-00016.safetensors",
"model.layers.9.self_attn.v_proj.bias": "model-00006-of-00016.safetensors",
"model.layers.9.self_attn.v_proj.weight": "model-00006-of-00016.safetensors",
"model.norm.weight": "model-00016-of-00016.safetensors"
}
}

31
special_tokens_map.json Normal file
View File

@@ -0,0 +1,31 @@
{
"additional_special_tokens": [
"<|im_start|>",
"<|im_end|>",
"<|object_ref_start|>",
"<|object_ref_end|>",
"<|box_start|>",
"<|box_end|>",
"<|quad_start|>",
"<|quad_end|>",
"<|vision_start|>",
"<|vision_end|>",
"<|vision_pad|>",
"<|image_pad|>",
"<|video_pad|>"
],
"eos_token": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

303283
tokenizer.json Normal file

File diff suppressed because it is too large Load Diff

207
tokenizer_config.json Normal file
View File

@@ -0,0 +1,207 @@
{
"add_bos_token": false,
"add_prefix_space": false,
"added_tokens_decoder": {
"151643": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151644": {
"content": "<|im_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151645": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151646": {
"content": "<|object_ref_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151647": {
"content": "<|object_ref_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151648": {
"content": "<|box_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151649": {
"content": "<|box_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151650": {
"content": "<|quad_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151651": {
"content": "<|quad_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151652": {
"content": "<|vision_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151653": {
"content": "<|vision_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151654": {
"content": "<|vision_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151655": {
"content": "<|image_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151656": {
"content": "<|video_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151657": {
"content": "<tool_call>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151658": {
"content": "</tool_call>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151659": {
"content": "<|fim_prefix|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151660": {
"content": "<|fim_middle|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151661": {
"content": "<|fim_suffix|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151662": {
"content": "<|fim_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151663": {
"content": "<|repo_name|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151664": {
"content": "<|file_sep|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
}
},
"additional_special_tokens": [
"<|im_start|>",
"<|im_end|>",
"<|object_ref_start|>",
"<|object_ref_end|>",
"<|box_start|>",
"<|box_end|>",
"<|quad_start|>",
"<|quad_end|>",
"<|vision_start|>",
"<|vision_end|>",
"<|vision_pad|>",
"<|image_pad|>",
"<|video_pad|>"
],
"bos_token": null,
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
"clean_up_tokenization_spaces": false,
"eos_token": "<|im_end|>",
"errors": "replace",
"model_max_length": 131072,
"pad_token": "<|endoftext|>",
"split_special_tokens": false,
"tokenizer_class": "Qwen2Tokenizer",
"unk_token": null
}

BIN
training_eval_loss.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

BIN
training_loss.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

1
vocab.json Normal file

File diff suppressed because one or more lines are too long