初始化项目,由ModelHub XC社区提供模型

Model: SanjiWatsuki/Kunoichi-DPO-v2-7B
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-17 23:49:51 +08:00
commit 3e06cac121
12 changed files with 91624 additions and 0 deletions

35
.gitattributes vendored Normal file
View File

@@ -0,0 +1,35 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text

62
README.md Normal file
View File

@@ -0,0 +1,62 @@
---
license: cc-by-nc-4.0
---
| Model | MT Bench | EQ Bench | MMLU | Logic Test |
|----------------------|----------|----------|---------|-------------|
| GPT-4-Turbo | 9.32 | - | - | - |
| GPT-4 | 8.99 | 62.52 | 86.4 | 0.86 |
| **Kunoichi-DPO-v2-7B** | **8.51** | **42.18** | **64.94**| **0.58** |
| Mixtral-8x7B-Instruct| 8.30 | 44.81 | 70.6 | 0.75 |
| **Kunoichi-DPO-7B** | **8.29** | **41.60** | **64.83** | **0.59** |
| **Kunoichi-7B** | **8.14** | **44.32** | **64.9** | **0.58** |
| Starling-7B | 8.09 | - | 63.9 | 0.51 |
| Claude-2 | 8.06 | 52.14 | 78.5 | - |
| Silicon-Maid-7B | 7.96 | 40.44 | 64.7 | 0.54 |
| Loyal-Macaroni-Maid-7B | 7.95 | 38.66 | 64.9 | 0.57 |
| GPT-3.5-Turbo | 7.94 | 50.28 | 70 | 0.57 |
| Claude-1 | 7.9 | - | 77 | - |
| Openchat-3.5 | 7.81 | 37.08 | 64.3 | 0.39 |
| Dolphin-2.6-DPO | 7.74 | 42.88 | 61.9 | 0.53 |
| Zephyr-7B-beta | 7.34 | 38.71 | 61.4 | 0.30 |
| Llama-2-70b-chat-hf | 6.86 | 51.56 | 63 | - |
| Neural-chat-7b-v3-1 | 6.84 | 43.61 | 62.4 | 0.30 |
| Model | Average | AGIEval | GPT4All | TruthfulQA | Bigbench |
|---|---:|---:|---:|---:|---:|
| **Kunoichi-DPO-7B**|**58.4**| 45.08 | 74| 66.99| 47.52|
| **Kunoichi-DPO-v2-7B**|**58.31**| 44.85| 75.05| 65.69| 47.65|
| [Kunoichi-7B](https://huggingface.co/SanjiWatsuki/Kunoichi-7B)|57.54| 44.99| 74.86| 63.72| 46.58|
| [OpenPipe/mistral-ft-optimized-1218](https://huggingface.co/OpenPipe/mistral-ft-optimized-1218)| 56.85 | 44.74 | 75.6 | 59.89 | 47.17 |
| [Silicon-Maid-7B](https://huggingface.co/SanjiWatsuki/Silicon-Maid-7B) | 56.45| 44.74| 74.26| 61.5| 45.32|
| [mlabonne/NeuralHermes-2.5-Mistral-7B](https://huggingface.co/mlabonne/NeuralHermes-2.5-Mistral-7B) | 53.51 | 43.67 | 73.24 | 55.37 | 41.76 |
| [teknium/OpenHermes-2.5-Mistral-7B](https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B) | 52.42 | 42.75 | 72.99 | 52.99 | 40.94 |
| [openchat/openchat_3.5](https://huggingface.co/openchat/openchat_3.5) | 51.34 | 42.67 | 72.92 | 47.27 | 42.51 |
| [berkeley-nest/Starling-LM-7B-alpha](https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha) | 51.16 | 42.06 | 72.72 | 47.33 | 42.53 |
| [HuggingFaceH4/zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) | 50.99 | 37.33 | 71.83 | 55.1 | 39.7 |
| Model | AlpacaEval2 | Length |
| --------------------------- | ----------- | ------ |
| GPT-4 | 23.58% | 1365 |
| GPT-4 0314 | 22.07% | 1371 |
| Mistral Medium | 21.86% | 1500 |
| Mixtral 8x7B v0.1 | 18.26% | 1465 |
| **Kunoichi-DPO-v2** | **17.19%** | 1785 |
| Claude 2 | 17.19% | 1069 |
| Claude | 16.99% | 1082 |
| Gemini Pro | 16.85% | 1315 |
| GPT-4 0613 | 15.76% | 1140 |
| Claude 2.1 | 15.73% | 1096 |
| Mistral 7B v0.2 | 14.72% | 1676 |
| GPT 3.5 Turbo 0613 | 14.13% | 1328 |
| LLaMA2 Chat 70B | 13.87% | 1790 |
| LMCocktail-10.7B-v1 | 13.15% | 1203 |
| WizardLM 13B V1.1 | 11.23% | 1525 |
| Zephyr 7B Beta | 10.99% | 1444 |
| OpenHermes-2.5-Mistral (7B) | 10.34% | 1107 |
| GPT 3.5 Turbo 0301 | 9.62% | 827 |
| **Kunoichi-7B** | **9.38%** | 1492 |
| GPT 3.5 Turbo 1106 | 9.18% | 796 |
| GPT-3.5 | 8.56% | 1018 |
| Phi-2 DPO | 7.76% | 1687 |
| LLaMA2 Chat 13B | 7.70% | 1513 |

26
config.json Normal file
View File

@@ -0,0 +1,26 @@
{
"_name_or_path": "SanjiWatsuki/Kunoichi-7B",
"architectures": [
"MistralForCausalLM"
],
"attention_dropout": 0.0,
"bos_token_id": 1,
"eos_token_id": 2,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 14336,
"max_position_embeddings": 8192,
"model_type": "mistral",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 8,
"rms_norm_eps": 1e-05,
"rope_theta": 10000.0,
"sliding_window": 4096,
"tie_word_embeddings": false,
"torch_dtype": "float16",
"transformers_version": "4.35.2",
"use_cache": true,
"vocab_size": 32000
}

6
generation_config.json Normal file
View File

@@ -0,0 +1,6 @@
{
"_from_model_config": true,
"bos_token_id": 1,
"eos_token_id": 2,
"transformers_version": "4.35.2"
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:0f616568516897c713b8f22ad5859977d0f0d84426c6aad2b20994d023778c5d
size 4943162240

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:0fa86e8b85d5a68cec58fdef7a588819354cabda44540c74061142d7faeb9eec
size 4999819232

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:38b0f51b3cd04e89d0aefd87681cbe19eef2de7f2b650ce82f3d64031490de4a
size 4540516256

View File

@@ -0,0 +1,298 @@
{
"metadata": {
"total_size": 14483464192
},
"weight_map": {
"lm_head.weight": "model-00003-of-00003.safetensors",
"model.embed_tokens.weight": "model-00001-of-00003.safetensors",
"model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.10.input_layernorm.weight": "model-00002-of-00003.safetensors",
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
"model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
"model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
"model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
"model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.22.input_layernorm.weight": "model-00003-of-00003.safetensors",
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.22.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.22.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
"model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
"model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.23.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
"model.layers.23.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.24.input_layernorm.weight": "model-00003-of-00003.safetensors",
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.24.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.24.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.25.input_layernorm.weight": "model-00003-of-00003.safetensors",
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors",
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors",
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.27.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.27.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.27.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors",
"model.layers.28.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.28.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.28.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.28.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
"model.layers.28.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.28.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.28.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.28.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.29.input_layernorm.weight": "model-00003-of-00003.safetensors",
"model.layers.29.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.29.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.29.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.29.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
"model.layers.29.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.29.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.29.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.29.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors",
"model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.30.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
"model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
"model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
"model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
"model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
"model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
"model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
"model.norm.weight": "model-00003-of-00003.safetensors"
}
}

23
special_tokens_map.json Normal file
View File

@@ -0,0 +1,23 @@
{
"bos_token": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

91122
tokenizer.json Normal file

File diff suppressed because it is too large Load Diff

BIN
tokenizer.model (Stored with Git LFS) Normal file

Binary file not shown.

40
tokenizer_config.json Normal file
View File

@@ -0,0 +1,40 @@
{
"added_tokens_decoder": {
"0": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [],
"bos_token": "<s>",
"clean_up_tokenization_spaces": false,
"eos_token": "</s>",
"legacy": true,
"model_max_length": 1000000000000000019884624838656,
"pad_token": null,
"sp_model_kwargs": {},
"spaces_between_special_tokens": false,
"tokenizer_class": "LlamaTokenizer",
"unk_token": "<unk>",
"use_default_system_prompt": false
}