初始化项目,由ModelHub XC社区提供模型

Model: liuda1/Mistral-7B-golden
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-07 23:57:52 +08:00
commit df38b66c27
24 changed files with 91603 additions and 0 deletions

35
.gitattributes vendored Normal file
View File

@@ -0,0 +1,35 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text

4
README.md Normal file
View File

@@ -0,0 +1,4 @@
---
license: unknown
---
Our model is based on Mistral-7B-v0.1 as the basic model, with English chat dataset added for fine-tuning training, and further reinforcement training based on specific datasets. The trained model has a certain level of chat ability, which was found to be enhanced during self testing. We will continue to train the model in the future to improve our Chinese chat ability

25
config.json Normal file
View File

@@ -0,0 +1,25 @@
{
"_name_or_path": "/data1/ljf/data/Mistral-7B-v0.1",
"architectures": [
"MistralForCausalLM"
],
"bos_token_id": 1,
"eos_token_id": 2,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 14336,
"max_position_embeddings": 32768,
"model_type": "mistral",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 8,
"rms_norm_eps": 1e-05,
"rope_theta": 10000.0,
"sliding_window": 4096,
"tie_word_embeddings": false,
"torch_dtype": "bfloat16",
"transformers_version": "4.34.1",
"use_cache": true,
"vocab_size": 32000
}

6
generation_config.json Normal file
View File

@@ -0,0 +1,6 @@
{
"_from_model_config": true,
"bos_token_id": 1,
"eos_token_id": 2,
"transformers_version": "4.34.1"
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:51c908ee8ffc99099f426010b6d5fcd62008b31708baffd49a877585f42b2790
size 899699869

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:7116eb06e4d246a34126120c31fdf3c00470598b5f90081e2cc8a1596f914bda
size 989895077

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3b86c7b84bfcda7268bc7e56efdf698e5878422be7acefff44bc60135d2d6359
size 989912073

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1666118bf7723f41456af64f14c8fcc5a90baa1ca9a967ea1c4da766110fa49c
size 956341595

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:46d245903e47f06d13dc280e067abf745819cc9ddb7448634da077656cba2726
size 989895077

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e9a28ed9e56299778708f4926a8b92ae0829a002488cd6da2858f44ac4684ca7
size 989895077

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6966ff4fc1c2d0a2a2fd6237e664978b33d7685835ae1f1a59a4f64c8743e3c6
size 989912073

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1dba7e9ef375b8ed7f7b86039abea276f67c9a69da657a1ead60fe8a13c87658
size 956341595

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:215161030893b272d00dd28b1a27255110fb12dcbf8a60b4d8a864f1c59d74ed
size 989895077

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4ece03a7635c3491be0142ad7c88b31c12ff6bb34c1ac1addacd02bb8deb3a3d
size 989895077

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:65bb7bee43f39293452950c33ad6f1e799eed7eec3103571c1422a915353d19c
size 989912073

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d830765a934b3048dbadc0d02e202ec42b19c3099ed7db1691f6983d46762e13
size 956341595

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:920f312e9765750a5ee95eb6684a123b85375f9d0ede08cf76f67c333f1538a9
size 989895077

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e09c18ad65426cfbd0c6c25606d3468ea93a05683d2e3745adf89287c59ee687
size 989895077

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:08cf651e0695acbaa866f94693714f0ecd5e9777164bf174bd1b67a3dc5d413c
size 1340126027

View File

@@ -0,0 +1,298 @@
{
"metadata": {
"total_size": 14483464192
},
"weight_map": {
"lm_head.weight": "pytorch_model-00015-of-00015.bin",
"model.embed_tokens.weight": "pytorch_model-00001-of-00015.bin",
"model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00015.bin",
"model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00015.bin",
"model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00015.bin",
"model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00015.bin",
"model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00015.bin",
"model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00015.bin",
"model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00015.bin",
"model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00015.bin",
"model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00015.bin",
"model.layers.1.input_layernorm.weight": "pytorch_model-00002-of-00015.bin",
"model.layers.1.mlp.down_proj.weight": "pytorch_model-00002-of-00015.bin",
"model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00015.bin",
"model.layers.1.mlp.up_proj.weight": "pytorch_model-00002-of-00015.bin",
"model.layers.1.post_attention_layernorm.weight": "pytorch_model-00002-of-00015.bin",
"model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00015.bin",
"model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00015.bin",
"model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00015.bin",
"model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00015.bin",
"model.layers.10.input_layernorm.weight": "pytorch_model-00006-of-00015.bin",
"model.layers.10.mlp.down_proj.weight": "pytorch_model-00006-of-00015.bin",
"model.layers.10.mlp.gate_proj.weight": "pytorch_model-00005-of-00015.bin",
"model.layers.10.mlp.up_proj.weight": "pytorch_model-00006-of-00015.bin",
"model.layers.10.post_attention_layernorm.weight": "pytorch_model-00006-of-00015.bin",
"model.layers.10.self_attn.k_proj.weight": "pytorch_model-00005-of-00015.bin",
"model.layers.10.self_attn.o_proj.weight": "pytorch_model-00005-of-00015.bin",
"model.layers.10.self_attn.q_proj.weight": "pytorch_model-00005-of-00015.bin",
"model.layers.10.self_attn.v_proj.weight": "pytorch_model-00005-of-00015.bin",
"model.layers.11.input_layernorm.weight": "pytorch_model-00006-of-00015.bin",
"model.layers.11.mlp.down_proj.weight": "pytorch_model-00006-of-00015.bin",
"model.layers.11.mlp.gate_proj.weight": "pytorch_model-00006-of-00015.bin",
"model.layers.11.mlp.up_proj.weight": "pytorch_model-00006-of-00015.bin",
"model.layers.11.post_attention_layernorm.weight": "pytorch_model-00006-of-00015.bin",
"model.layers.11.self_attn.k_proj.weight": "pytorch_model-00006-of-00015.bin",
"model.layers.11.self_attn.o_proj.weight": "pytorch_model-00006-of-00015.bin",
"model.layers.11.self_attn.q_proj.weight": "pytorch_model-00006-of-00015.bin",
"model.layers.11.self_attn.v_proj.weight": "pytorch_model-00006-of-00015.bin",
"model.layers.12.input_layernorm.weight": "pytorch_model-00007-of-00015.bin",
"model.layers.12.mlp.down_proj.weight": "pytorch_model-00007-of-00015.bin",
"model.layers.12.mlp.gate_proj.weight": "pytorch_model-00006-of-00015.bin",
"model.layers.12.mlp.up_proj.weight": "pytorch_model-00006-of-00015.bin",
"model.layers.12.post_attention_layernorm.weight": "pytorch_model-00007-of-00015.bin",
"model.layers.12.self_attn.k_proj.weight": "pytorch_model-00006-of-00015.bin",
"model.layers.12.self_attn.o_proj.weight": "pytorch_model-00006-of-00015.bin",
"model.layers.12.self_attn.q_proj.weight": "pytorch_model-00006-of-00015.bin",
"model.layers.12.self_attn.v_proj.weight": "pytorch_model-00006-of-00015.bin",
"model.layers.13.input_layernorm.weight": "pytorch_model-00007-of-00015.bin",
"model.layers.13.mlp.down_proj.weight": "pytorch_model-00007-of-00015.bin",
"model.layers.13.mlp.gate_proj.weight": "pytorch_model-00007-of-00015.bin",
"model.layers.13.mlp.up_proj.weight": "pytorch_model-00007-of-00015.bin",
"model.layers.13.post_attention_layernorm.weight": "pytorch_model-00007-of-00015.bin",
"model.layers.13.self_attn.k_proj.weight": "pytorch_model-00007-of-00015.bin",
"model.layers.13.self_attn.o_proj.weight": "pytorch_model-00007-of-00015.bin",
"model.layers.13.self_attn.q_proj.weight": "pytorch_model-00007-of-00015.bin",
"model.layers.13.self_attn.v_proj.weight": "pytorch_model-00007-of-00015.bin",
"model.layers.14.input_layernorm.weight": "pytorch_model-00007-of-00015.bin",
"model.layers.14.mlp.down_proj.weight": "pytorch_model-00007-of-00015.bin",
"model.layers.14.mlp.gate_proj.weight": "pytorch_model-00007-of-00015.bin",
"model.layers.14.mlp.up_proj.weight": "pytorch_model-00007-of-00015.bin",
"model.layers.14.post_attention_layernorm.weight": "pytorch_model-00007-of-00015.bin",
"model.layers.14.self_attn.k_proj.weight": "pytorch_model-00007-of-00015.bin",
"model.layers.14.self_attn.o_proj.weight": "pytorch_model-00007-of-00015.bin",
"model.layers.14.self_attn.q_proj.weight": "pytorch_model-00007-of-00015.bin",
"model.layers.14.self_attn.v_proj.weight": "pytorch_model-00007-of-00015.bin",
"model.layers.15.input_layernorm.weight": "pytorch_model-00008-of-00015.bin",
"model.layers.15.mlp.down_proj.weight": "pytorch_model-00008-of-00015.bin",
"model.layers.15.mlp.gate_proj.weight": "pytorch_model-00008-of-00015.bin",
"model.layers.15.mlp.up_proj.weight": "pytorch_model-00008-of-00015.bin",
"model.layers.15.post_attention_layernorm.weight": "pytorch_model-00008-of-00015.bin",
"model.layers.15.self_attn.k_proj.weight": "pytorch_model-00008-of-00015.bin",
"model.layers.15.self_attn.o_proj.weight": "pytorch_model-00008-of-00015.bin",
"model.layers.15.self_attn.q_proj.weight": "pytorch_model-00008-of-00015.bin",
"model.layers.15.self_attn.v_proj.weight": "pytorch_model-00008-of-00015.bin",
"model.layers.16.input_layernorm.weight": "pytorch_model-00008-of-00015.bin",
"model.layers.16.mlp.down_proj.weight": "pytorch_model-00008-of-00015.bin",
"model.layers.16.mlp.gate_proj.weight": "pytorch_model-00008-of-00015.bin",
"model.layers.16.mlp.up_proj.weight": "pytorch_model-00008-of-00015.bin",
"model.layers.16.post_attention_layernorm.weight": "pytorch_model-00008-of-00015.bin",
"model.layers.16.self_attn.k_proj.weight": "pytorch_model-00008-of-00015.bin",
"model.layers.16.self_attn.o_proj.weight": "pytorch_model-00008-of-00015.bin",
"model.layers.16.self_attn.q_proj.weight": "pytorch_model-00008-of-00015.bin",
"model.layers.16.self_attn.v_proj.weight": "pytorch_model-00008-of-00015.bin",
"model.layers.17.input_layernorm.weight": "pytorch_model-00009-of-00015.bin",
"model.layers.17.mlp.down_proj.weight": "pytorch_model-00009-of-00015.bin",
"model.layers.17.mlp.gate_proj.weight": "pytorch_model-00009-of-00015.bin",
"model.layers.17.mlp.up_proj.weight": "pytorch_model-00009-of-00015.bin",
"model.layers.17.post_attention_layernorm.weight": "pytorch_model-00009-of-00015.bin",
"model.layers.17.self_attn.k_proj.weight": "pytorch_model-00008-of-00015.bin",
"model.layers.17.self_attn.o_proj.weight": "pytorch_model-00008-of-00015.bin",
"model.layers.17.self_attn.q_proj.weight": "pytorch_model-00008-of-00015.bin",
"model.layers.17.self_attn.v_proj.weight": "pytorch_model-00008-of-00015.bin",
"model.layers.18.input_layernorm.weight": "pytorch_model-00009-of-00015.bin",
"model.layers.18.mlp.down_proj.weight": "pytorch_model-00009-of-00015.bin",
"model.layers.18.mlp.gate_proj.weight": "pytorch_model-00009-of-00015.bin",
"model.layers.18.mlp.up_proj.weight": "pytorch_model-00009-of-00015.bin",
"model.layers.18.post_attention_layernorm.weight": "pytorch_model-00009-of-00015.bin",
"model.layers.18.self_attn.k_proj.weight": "pytorch_model-00009-of-00015.bin",
"model.layers.18.self_attn.o_proj.weight": "pytorch_model-00009-of-00015.bin",
"model.layers.18.self_attn.q_proj.weight": "pytorch_model-00009-of-00015.bin",
"model.layers.18.self_attn.v_proj.weight": "pytorch_model-00009-of-00015.bin",
"model.layers.19.input_layernorm.weight": "pytorch_model-00010-of-00015.bin",
"model.layers.19.mlp.down_proj.weight": "pytorch_model-00010-of-00015.bin",
"model.layers.19.mlp.gate_proj.weight": "pytorch_model-00009-of-00015.bin",
"model.layers.19.mlp.up_proj.weight": "pytorch_model-00010-of-00015.bin",
"model.layers.19.post_attention_layernorm.weight": "pytorch_model-00010-of-00015.bin",
"model.layers.19.self_attn.k_proj.weight": "pytorch_model-00009-of-00015.bin",
"model.layers.19.self_attn.o_proj.weight": "pytorch_model-00009-of-00015.bin",
"model.layers.19.self_attn.q_proj.weight": "pytorch_model-00009-of-00015.bin",
"model.layers.19.self_attn.v_proj.weight": "pytorch_model-00009-of-00015.bin",
"model.layers.2.input_layernorm.weight": "pytorch_model-00002-of-00015.bin",
"model.layers.2.mlp.down_proj.weight": "pytorch_model-00002-of-00015.bin",
"model.layers.2.mlp.gate_proj.weight": "pytorch_model-00002-of-00015.bin",
"model.layers.2.mlp.up_proj.weight": "pytorch_model-00002-of-00015.bin",
"model.layers.2.post_attention_layernorm.weight": "pytorch_model-00002-of-00015.bin",
"model.layers.2.self_attn.k_proj.weight": "pytorch_model-00002-of-00015.bin",
"model.layers.2.self_attn.o_proj.weight": "pytorch_model-00002-of-00015.bin",
"model.layers.2.self_attn.q_proj.weight": "pytorch_model-00002-of-00015.bin",
"model.layers.2.self_attn.v_proj.weight": "pytorch_model-00002-of-00015.bin",
"model.layers.20.input_layernorm.weight": "pytorch_model-00010-of-00015.bin",
"model.layers.20.mlp.down_proj.weight": "pytorch_model-00010-of-00015.bin",
"model.layers.20.mlp.gate_proj.weight": "pytorch_model-00010-of-00015.bin",
"model.layers.20.mlp.up_proj.weight": "pytorch_model-00010-of-00015.bin",
"model.layers.20.post_attention_layernorm.weight": "pytorch_model-00010-of-00015.bin",
"model.layers.20.self_attn.k_proj.weight": "pytorch_model-00010-of-00015.bin",
"model.layers.20.self_attn.o_proj.weight": "pytorch_model-00010-of-00015.bin",
"model.layers.20.self_attn.q_proj.weight": "pytorch_model-00010-of-00015.bin",
"model.layers.20.self_attn.v_proj.weight": "pytorch_model-00010-of-00015.bin",
"model.layers.21.input_layernorm.weight": "pytorch_model-00011-of-00015.bin",
"model.layers.21.mlp.down_proj.weight": "pytorch_model-00011-of-00015.bin",
"model.layers.21.mlp.gate_proj.weight": "pytorch_model-00010-of-00015.bin",
"model.layers.21.mlp.up_proj.weight": "pytorch_model-00010-of-00015.bin",
"model.layers.21.post_attention_layernorm.weight": "pytorch_model-00011-of-00015.bin",
"model.layers.21.self_attn.k_proj.weight": "pytorch_model-00010-of-00015.bin",
"model.layers.21.self_attn.o_proj.weight": "pytorch_model-00010-of-00015.bin",
"model.layers.21.self_attn.q_proj.weight": "pytorch_model-00010-of-00015.bin",
"model.layers.21.self_attn.v_proj.weight": "pytorch_model-00010-of-00015.bin",
"model.layers.22.input_layernorm.weight": "pytorch_model-00011-of-00015.bin",
"model.layers.22.mlp.down_proj.weight": "pytorch_model-00011-of-00015.bin",
"model.layers.22.mlp.gate_proj.weight": "pytorch_model-00011-of-00015.bin",
"model.layers.22.mlp.up_proj.weight": "pytorch_model-00011-of-00015.bin",
"model.layers.22.post_attention_layernorm.weight": "pytorch_model-00011-of-00015.bin",
"model.layers.22.self_attn.k_proj.weight": "pytorch_model-00011-of-00015.bin",
"model.layers.22.self_attn.o_proj.weight": "pytorch_model-00011-of-00015.bin",
"model.layers.22.self_attn.q_proj.weight": "pytorch_model-00011-of-00015.bin",
"model.layers.22.self_attn.v_proj.weight": "pytorch_model-00011-of-00015.bin",
"model.layers.23.input_layernorm.weight": "pytorch_model-00011-of-00015.bin",
"model.layers.23.mlp.down_proj.weight": "pytorch_model-00011-of-00015.bin",
"model.layers.23.mlp.gate_proj.weight": "pytorch_model-00011-of-00015.bin",
"model.layers.23.mlp.up_proj.weight": "pytorch_model-00011-of-00015.bin",
"model.layers.23.post_attention_layernorm.weight": "pytorch_model-00011-of-00015.bin",
"model.layers.23.self_attn.k_proj.weight": "pytorch_model-00011-of-00015.bin",
"model.layers.23.self_attn.o_proj.weight": "pytorch_model-00011-of-00015.bin",
"model.layers.23.self_attn.q_proj.weight": "pytorch_model-00011-of-00015.bin",
"model.layers.23.self_attn.v_proj.weight": "pytorch_model-00011-of-00015.bin",
"model.layers.24.input_layernorm.weight": "pytorch_model-00012-of-00015.bin",
"model.layers.24.mlp.down_proj.weight": "pytorch_model-00012-of-00015.bin",
"model.layers.24.mlp.gate_proj.weight": "pytorch_model-00012-of-00015.bin",
"model.layers.24.mlp.up_proj.weight": "pytorch_model-00012-of-00015.bin",
"model.layers.24.post_attention_layernorm.weight": "pytorch_model-00012-of-00015.bin",
"model.layers.24.self_attn.k_proj.weight": "pytorch_model-00012-of-00015.bin",
"model.layers.24.self_attn.o_proj.weight": "pytorch_model-00012-of-00015.bin",
"model.layers.24.self_attn.q_proj.weight": "pytorch_model-00012-of-00015.bin",
"model.layers.24.self_attn.v_proj.weight": "pytorch_model-00012-of-00015.bin",
"model.layers.25.input_layernorm.weight": "pytorch_model-00012-of-00015.bin",
"model.layers.25.mlp.down_proj.weight": "pytorch_model-00012-of-00015.bin",
"model.layers.25.mlp.gate_proj.weight": "pytorch_model-00012-of-00015.bin",
"model.layers.25.mlp.up_proj.weight": "pytorch_model-00012-of-00015.bin",
"model.layers.25.post_attention_layernorm.weight": "pytorch_model-00012-of-00015.bin",
"model.layers.25.self_attn.k_proj.weight": "pytorch_model-00012-of-00015.bin",
"model.layers.25.self_attn.o_proj.weight": "pytorch_model-00012-of-00015.bin",
"model.layers.25.self_attn.q_proj.weight": "pytorch_model-00012-of-00015.bin",
"model.layers.25.self_attn.v_proj.weight": "pytorch_model-00012-of-00015.bin",
"model.layers.26.input_layernorm.weight": "pytorch_model-00013-of-00015.bin",
"model.layers.26.mlp.down_proj.weight": "pytorch_model-00013-of-00015.bin",
"model.layers.26.mlp.gate_proj.weight": "pytorch_model-00013-of-00015.bin",
"model.layers.26.mlp.up_proj.weight": "pytorch_model-00013-of-00015.bin",
"model.layers.26.post_attention_layernorm.weight": "pytorch_model-00013-of-00015.bin",
"model.layers.26.self_attn.k_proj.weight": "pytorch_model-00012-of-00015.bin",
"model.layers.26.self_attn.o_proj.weight": "pytorch_model-00012-of-00015.bin",
"model.layers.26.self_attn.q_proj.weight": "pytorch_model-00012-of-00015.bin",
"model.layers.26.self_attn.v_proj.weight": "pytorch_model-00012-of-00015.bin",
"model.layers.27.input_layernorm.weight": "pytorch_model-00013-of-00015.bin",
"model.layers.27.mlp.down_proj.weight": "pytorch_model-00013-of-00015.bin",
"model.layers.27.mlp.gate_proj.weight": "pytorch_model-00013-of-00015.bin",
"model.layers.27.mlp.up_proj.weight": "pytorch_model-00013-of-00015.bin",
"model.layers.27.post_attention_layernorm.weight": "pytorch_model-00013-of-00015.bin",
"model.layers.27.self_attn.k_proj.weight": "pytorch_model-00013-of-00015.bin",
"model.layers.27.self_attn.o_proj.weight": "pytorch_model-00013-of-00015.bin",
"model.layers.27.self_attn.q_proj.weight": "pytorch_model-00013-of-00015.bin",
"model.layers.27.self_attn.v_proj.weight": "pytorch_model-00013-of-00015.bin",
"model.layers.28.input_layernorm.weight": "pytorch_model-00014-of-00015.bin",
"model.layers.28.mlp.down_proj.weight": "pytorch_model-00014-of-00015.bin",
"model.layers.28.mlp.gate_proj.weight": "pytorch_model-00013-of-00015.bin",
"model.layers.28.mlp.up_proj.weight": "pytorch_model-00014-of-00015.bin",
"model.layers.28.post_attention_layernorm.weight": "pytorch_model-00014-of-00015.bin",
"model.layers.28.self_attn.k_proj.weight": "pytorch_model-00013-of-00015.bin",
"model.layers.28.self_attn.o_proj.weight": "pytorch_model-00013-of-00015.bin",
"model.layers.28.self_attn.q_proj.weight": "pytorch_model-00013-of-00015.bin",
"model.layers.28.self_attn.v_proj.weight": "pytorch_model-00013-of-00015.bin",
"model.layers.29.input_layernorm.weight": "pytorch_model-00014-of-00015.bin",
"model.layers.29.mlp.down_proj.weight": "pytorch_model-00014-of-00015.bin",
"model.layers.29.mlp.gate_proj.weight": "pytorch_model-00014-of-00015.bin",
"model.layers.29.mlp.up_proj.weight": "pytorch_model-00014-of-00015.bin",
"model.layers.29.post_attention_layernorm.weight": "pytorch_model-00014-of-00015.bin",
"model.layers.29.self_attn.k_proj.weight": "pytorch_model-00014-of-00015.bin",
"model.layers.29.self_attn.o_proj.weight": "pytorch_model-00014-of-00015.bin",
"model.layers.29.self_attn.q_proj.weight": "pytorch_model-00014-of-00015.bin",
"model.layers.29.self_attn.v_proj.weight": "pytorch_model-00014-of-00015.bin",
"model.layers.3.input_layernorm.weight": "pytorch_model-00003-of-00015.bin",
"model.layers.3.mlp.down_proj.weight": "pytorch_model-00003-of-00015.bin",
"model.layers.3.mlp.gate_proj.weight": "pytorch_model-00002-of-00015.bin",
"model.layers.3.mlp.up_proj.weight": "pytorch_model-00002-of-00015.bin",
"model.layers.3.post_attention_layernorm.weight": "pytorch_model-00003-of-00015.bin",
"model.layers.3.self_attn.k_proj.weight": "pytorch_model-00002-of-00015.bin",
"model.layers.3.self_attn.o_proj.weight": "pytorch_model-00002-of-00015.bin",
"model.layers.3.self_attn.q_proj.weight": "pytorch_model-00002-of-00015.bin",
"model.layers.3.self_attn.v_proj.weight": "pytorch_model-00002-of-00015.bin",
"model.layers.30.input_layernorm.weight": "pytorch_model-00015-of-00015.bin",
"model.layers.30.mlp.down_proj.weight": "pytorch_model-00015-of-00015.bin",
"model.layers.30.mlp.gate_proj.weight": "pytorch_model-00014-of-00015.bin",
"model.layers.30.mlp.up_proj.weight": "pytorch_model-00014-of-00015.bin",
"model.layers.30.post_attention_layernorm.weight": "pytorch_model-00015-of-00015.bin",
"model.layers.30.self_attn.k_proj.weight": "pytorch_model-00014-of-00015.bin",
"model.layers.30.self_attn.o_proj.weight": "pytorch_model-00014-of-00015.bin",
"model.layers.30.self_attn.q_proj.weight": "pytorch_model-00014-of-00015.bin",
"model.layers.30.self_attn.v_proj.weight": "pytorch_model-00014-of-00015.bin",
"model.layers.31.input_layernorm.weight": "pytorch_model-00015-of-00015.bin",
"model.layers.31.mlp.down_proj.weight": "pytorch_model-00015-of-00015.bin",
"model.layers.31.mlp.gate_proj.weight": "pytorch_model-00015-of-00015.bin",
"model.layers.31.mlp.up_proj.weight": "pytorch_model-00015-of-00015.bin",
"model.layers.31.post_attention_layernorm.weight": "pytorch_model-00015-of-00015.bin",
"model.layers.31.self_attn.k_proj.weight": "pytorch_model-00015-of-00015.bin",
"model.layers.31.self_attn.o_proj.weight": "pytorch_model-00015-of-00015.bin",
"model.layers.31.self_attn.q_proj.weight": "pytorch_model-00015-of-00015.bin",
"model.layers.31.self_attn.v_proj.weight": "pytorch_model-00015-of-00015.bin",
"model.layers.4.input_layernorm.weight": "pytorch_model-00003-of-00015.bin",
"model.layers.4.mlp.down_proj.weight": "pytorch_model-00003-of-00015.bin",
"model.layers.4.mlp.gate_proj.weight": "pytorch_model-00003-of-00015.bin",
"model.layers.4.mlp.up_proj.weight": "pytorch_model-00003-of-00015.bin",
"model.layers.4.post_attention_layernorm.weight": "pytorch_model-00003-of-00015.bin",
"model.layers.4.self_attn.k_proj.weight": "pytorch_model-00003-of-00015.bin",
"model.layers.4.self_attn.o_proj.weight": "pytorch_model-00003-of-00015.bin",
"model.layers.4.self_attn.q_proj.weight": "pytorch_model-00003-of-00015.bin",
"model.layers.4.self_attn.v_proj.weight": "pytorch_model-00003-of-00015.bin",
"model.layers.5.input_layernorm.weight": "pytorch_model-00003-of-00015.bin",
"model.layers.5.mlp.down_proj.weight": "pytorch_model-00003-of-00015.bin",
"model.layers.5.mlp.gate_proj.weight": "pytorch_model-00003-of-00015.bin",
"model.layers.5.mlp.up_proj.weight": "pytorch_model-00003-of-00015.bin",
"model.layers.5.post_attention_layernorm.weight": "pytorch_model-00003-of-00015.bin",
"model.layers.5.self_attn.k_proj.weight": "pytorch_model-00003-of-00015.bin",
"model.layers.5.self_attn.o_proj.weight": "pytorch_model-00003-of-00015.bin",
"model.layers.5.self_attn.q_proj.weight": "pytorch_model-00003-of-00015.bin",
"model.layers.5.self_attn.v_proj.weight": "pytorch_model-00003-of-00015.bin",
"model.layers.6.input_layernorm.weight": "pytorch_model-00004-of-00015.bin",
"model.layers.6.mlp.down_proj.weight": "pytorch_model-00004-of-00015.bin",
"model.layers.6.mlp.gate_proj.weight": "pytorch_model-00004-of-00015.bin",
"model.layers.6.mlp.up_proj.weight": "pytorch_model-00004-of-00015.bin",
"model.layers.6.post_attention_layernorm.weight": "pytorch_model-00004-of-00015.bin",
"model.layers.6.self_attn.k_proj.weight": "pytorch_model-00004-of-00015.bin",
"model.layers.6.self_attn.o_proj.weight": "pytorch_model-00004-of-00015.bin",
"model.layers.6.self_attn.q_proj.weight": "pytorch_model-00004-of-00015.bin",
"model.layers.6.self_attn.v_proj.weight": "pytorch_model-00004-of-00015.bin",
"model.layers.7.input_layernorm.weight": "pytorch_model-00004-of-00015.bin",
"model.layers.7.mlp.down_proj.weight": "pytorch_model-00004-of-00015.bin",
"model.layers.7.mlp.gate_proj.weight": "pytorch_model-00004-of-00015.bin",
"model.layers.7.mlp.up_proj.weight": "pytorch_model-00004-of-00015.bin",
"model.layers.7.post_attention_layernorm.weight": "pytorch_model-00004-of-00015.bin",
"model.layers.7.self_attn.k_proj.weight": "pytorch_model-00004-of-00015.bin",
"model.layers.7.self_attn.o_proj.weight": "pytorch_model-00004-of-00015.bin",
"model.layers.7.self_attn.q_proj.weight": "pytorch_model-00004-of-00015.bin",
"model.layers.7.self_attn.v_proj.weight": "pytorch_model-00004-of-00015.bin",
"model.layers.8.input_layernorm.weight": "pytorch_model-00005-of-00015.bin",
"model.layers.8.mlp.down_proj.weight": "pytorch_model-00005-of-00015.bin",
"model.layers.8.mlp.gate_proj.weight": "pytorch_model-00005-of-00015.bin",
"model.layers.8.mlp.up_proj.weight": "pytorch_model-00005-of-00015.bin",
"model.layers.8.post_attention_layernorm.weight": "pytorch_model-00005-of-00015.bin",
"model.layers.8.self_attn.k_proj.weight": "pytorch_model-00004-of-00015.bin",
"model.layers.8.self_attn.o_proj.weight": "pytorch_model-00004-of-00015.bin",
"model.layers.8.self_attn.q_proj.weight": "pytorch_model-00004-of-00015.bin",
"model.layers.8.self_attn.v_proj.weight": "pytorch_model-00004-of-00015.bin",
"model.layers.9.input_layernorm.weight": "pytorch_model-00005-of-00015.bin",
"model.layers.9.mlp.down_proj.weight": "pytorch_model-00005-of-00015.bin",
"model.layers.9.mlp.gate_proj.weight": "pytorch_model-00005-of-00015.bin",
"model.layers.9.mlp.up_proj.weight": "pytorch_model-00005-of-00015.bin",
"model.layers.9.post_attention_layernorm.weight": "pytorch_model-00005-of-00015.bin",
"model.layers.9.self_attn.k_proj.weight": "pytorch_model-00005-of-00015.bin",
"model.layers.9.self_attn.o_proj.weight": "pytorch_model-00005-of-00015.bin",
"model.layers.9.self_attn.q_proj.weight": "pytorch_model-00005-of-00015.bin",
"model.layers.9.self_attn.v_proj.weight": "pytorch_model-00005-of-00015.bin",
"model.norm.weight": "pytorch_model-00015-of-00015.bin"
}
}

23
special_tokens_map.json Normal file
View File

@@ -0,0 +1,23 @@
{
"bos_token": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

91122
tokenizer.json Normal file

File diff suppressed because it is too large Load Diff

BIN
tokenizer.model (Stored with Git LFS) Normal file

Binary file not shown.

42
tokenizer_config.json Normal file
View File

@@ -0,0 +1,42 @@
{
"added_tokens_decoder": {
"0": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [],
"bos_token": "<s>",
"clean_up_tokenization_spaces": false,
"eos_token": "</s>",
"legacy": true,
"model_max_length": 1000000000000000019884624838656,
"pad_token": null,
"padding_side": "left",
"sp_model_kwargs": {},
"spaces_between_special_tokens": false,
"split_special_tokens": false,
"tokenizer_class": "LlamaTokenizer",
"unk_token": "<unk>",
"use_default_system_prompt": true
}