初始化项目,由ModelHub XC社区提供模型

Model: cloudyu/mistral_11B_instruct_v0.1
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-06 07:32:36 +08:00
commit 1f12d9e683
17 changed files with 91779 additions and 0 deletions

35
.gitattributes vendored Normal file
View File

@@ -0,0 +1,35 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text

31
README.md Normal file
View File

@@ -0,0 +1,31 @@
---
license: cc-by-4.0
---
```
this is a 48 layers model based on mistral architecture
sft by vicgalle/alpaca-gpt4.
template is "{instruction} {inputs} \n {output}"
```
example output
```
<s> write me a story aboyt yosemite.
Here is a short story about the beauty and wonder of Yosemite National Park:
As the sun rose over the mountains, its warm golden rays illuminated the once- shadowed walls of the grandeur that was Yosemite. The early morning hush was broken by the sound of birds singing and the gentle rustling of leaves as the wind blowing through the trees.
In the heart of this natural wonder, there lived a young girl named Sarah. She had grown up in the village at the edge of the park, and her parents were some of the few who had made their homes there. They were farmers, but their real love was for the beauty of the land around them.
One day, while exploring the woods, Sarah stumbled upon a hidden clearing. It was filled with wildflowers, and in the center stood an old oak tree, its branches stretching out to provide shelter to many bird species. As she sat down under the tree, taking in the beauty of her surroundings, she heard a soft, melodious call.
It was a baby deer, and it had fallen into a narrow stream. Its mother was frantic with worry, calling out to her child from the shore. Sarah quickly ran to help, and with the help of a group of travelers passing through the park, they managed to save the baby.
From then on, Sarah would often visit the clearing, and soon, the animals began to trust her. She would sit and listen to the birds chirp, watch the butterflies flutter by, and even play with the curious foxes.
Years went by, and Sarah grew up to become a renowned conservationist, dedicated to preserving the beauty of nature. And so, thanks to her efforts, generations of people have been able to explore and appreciate the wonder of Yosemite.
The sun set over the mountains, casting long shadows across the landscape. As Sarah walked back to her village, she knew that she would never forget the memories she had made within those mountains, and that the magic of Yosemite would always remain with her.
And so, the cycle of life continued, with each day bringing new adventures and discoveries within the vast expanse of the beautiful and timeless Yosemite National Park.
```

26
config.json Normal file
View File

@@ -0,0 +1,26 @@
{
"_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
"architectures": [
"MistralForCausalLM"
],
"attention_dropout": 0.0,
"bos_token_id": 1,
"eos_token_id": 2,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 14336,
"max_position_embeddings": 32768,
"model_type": "mistral",
"num_attention_heads": 32,
"num_hidden_layers": 48,
"num_key_value_heads": 8,
"rms_norm_eps": 1e-05,
"rope_theta": 1000000.0,
"sliding_window": null,
"tie_word_embeddings": false,
"torch_dtype": "float32",
"transformers_version": "4.38.2",
"use_cache": true,
"vocab_size": 32000
}

6
generation_config.json Normal file
View File

@@ -0,0 +1,6 @@
{
"_from_model_config": true,
"bos_token_id": 1,
"eos_token_id": 2,
"transformers_version": "4.38.2"
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d5d9e891e9ddf087b29ad5127b8ab20beeb8e94ab60209f0f4a2b81a10ca0f24
size 4987196936

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:95c760fff7199f7947f1ba02301242a3121e9d292f97696ec77b7d3d2c4fb9c7
size 4899116440

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5fa28bdda4140e796e982032dfc97a1f837bd29c5f709992cd93f8011a3f5919
size 4999813120

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c6d79ef25690bc3ae188a538eff771ee7bafffef96725c586e15661e5cc4b5a7
size 4999813128

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:44fbad91905bdad941555683babe844a3c61fe2c26b3679a3135c1a472ab622e
size 4832007496

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3535a19a8582c41c9b80577b4e7d2184399aaca3744b6e820ecc858a98b35b54
size 4999813120

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c686ebee70601bc5c0a8a70233aa183ab378cf1bbdc0cf38a2f8c82d9795deb7
size 4999813128

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:364cad87b9505886341ae52969e35506019ccbc4ff6fed80d2bf92fddaf6cd4c
size 4832007496

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b05b1559c95462f9c720939d3e02921820dec7dff66f34d05857562720db5330
size 3376565856

View File

@@ -0,0 +1,442 @@
{
"metadata": {
"total_size": 42926096384
},
"weight_map": {
"lm_head.weight": "model-00009-of-00009.safetensors",
"model.embed_tokens.weight": "model-00001-of-00009.safetensors",
"model.layers.0.input_layernorm.weight": "model-00001-of-00009.safetensors",
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00009.safetensors",
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.1.input_layernorm.weight": "model-00001-of-00009.safetensors",
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00009.safetensors",
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.10.input_layernorm.weight": "model-00003-of-00009.safetensors",
"model.layers.10.mlp.down_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.10.post_attention_layernorm.weight": "model-00003-of-00009.safetensors",
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.11.input_layernorm.weight": "model-00003-of-00009.safetensors",
"model.layers.11.mlp.down_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.11.mlp.gate_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.11.mlp.up_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.11.post_attention_layernorm.weight": "model-00003-of-00009.safetensors",
"model.layers.11.self_attn.k_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.11.self_attn.o_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.11.self_attn.q_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.11.self_attn.v_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.12.input_layernorm.weight": "model-00003-of-00009.safetensors",
"model.layers.12.mlp.down_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.12.mlp.gate_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.12.mlp.up_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.12.post_attention_layernorm.weight": "model-00003-of-00009.safetensors",
"model.layers.12.self_attn.k_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.12.self_attn.o_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.12.self_attn.q_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.12.self_attn.v_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.13.input_layernorm.weight": "model-00003-of-00009.safetensors",
"model.layers.13.mlp.down_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.13.mlp.gate_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.13.mlp.up_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.13.post_attention_layernorm.weight": "model-00003-of-00009.safetensors",
"model.layers.13.self_attn.k_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.13.self_attn.o_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.13.self_attn.q_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.13.self_attn.v_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.14.input_layernorm.weight": "model-00003-of-00009.safetensors",
"model.layers.14.mlp.down_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.14.mlp.gate_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.14.mlp.up_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.14.post_attention_layernorm.weight": "model-00003-of-00009.safetensors",
"model.layers.14.self_attn.k_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.14.self_attn.o_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.14.self_attn.q_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.14.self_attn.v_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.15.input_layernorm.weight": "model-00003-of-00009.safetensors",
"model.layers.15.mlp.down_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.15.mlp.gate_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.15.mlp.up_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.15.post_attention_layernorm.weight": "model-00003-of-00009.safetensors",
"model.layers.15.self_attn.k_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.15.self_attn.o_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.15.self_attn.q_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.15.self_attn.v_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.16.input_layernorm.weight": "model-00004-of-00009.safetensors",
"model.layers.16.mlp.down_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.16.mlp.gate_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.16.mlp.up_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.16.post_attention_layernorm.weight": "model-00004-of-00009.safetensors",
"model.layers.16.self_attn.k_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.16.self_attn.o_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.16.self_attn.q_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.16.self_attn.v_proj.weight": "model-00003-of-00009.safetensors",
"model.layers.17.input_layernorm.weight": "model-00004-of-00009.safetensors",
"model.layers.17.mlp.down_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.17.mlp.gate_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.17.mlp.up_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.17.post_attention_layernorm.weight": "model-00004-of-00009.safetensors",
"model.layers.17.self_attn.k_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.17.self_attn.o_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.17.self_attn.q_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.17.self_attn.v_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.18.input_layernorm.weight": "model-00004-of-00009.safetensors",
"model.layers.18.mlp.down_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.18.mlp.gate_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.18.mlp.up_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.18.post_attention_layernorm.weight": "model-00004-of-00009.safetensors",
"model.layers.18.self_attn.k_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.18.self_attn.o_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.18.self_attn.q_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.18.self_attn.v_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.19.input_layernorm.weight": "model-00004-of-00009.safetensors",
"model.layers.19.mlp.down_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.19.mlp.gate_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.19.mlp.up_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.19.post_attention_layernorm.weight": "model-00004-of-00009.safetensors",
"model.layers.19.self_attn.k_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.19.self_attn.o_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.19.self_attn.q_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.19.self_attn.v_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.2.input_layernorm.weight": "model-00001-of-00009.safetensors",
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00009.safetensors",
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.20.input_layernorm.weight": "model-00004-of-00009.safetensors",
"model.layers.20.mlp.down_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.20.mlp.gate_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.20.mlp.up_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.20.post_attention_layernorm.weight": "model-00004-of-00009.safetensors",
"model.layers.20.self_attn.k_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.20.self_attn.o_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.20.self_attn.q_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.20.self_attn.v_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.21.input_layernorm.weight": "model-00004-of-00009.safetensors",
"model.layers.21.mlp.down_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.21.mlp.gate_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.21.mlp.up_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.21.post_attention_layernorm.weight": "model-00004-of-00009.safetensors",
"model.layers.21.self_attn.k_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.21.self_attn.o_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.21.self_attn.q_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.21.self_attn.v_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.22.input_layernorm.weight": "model-00005-of-00009.safetensors",
"model.layers.22.mlp.down_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.22.mlp.gate_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.22.mlp.up_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.22.post_attention_layernorm.weight": "model-00005-of-00009.safetensors",
"model.layers.22.self_attn.k_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.22.self_attn.o_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.22.self_attn.q_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.22.self_attn.v_proj.weight": "model-00004-of-00009.safetensors",
"model.layers.23.input_layernorm.weight": "model-00005-of-00009.safetensors",
"model.layers.23.mlp.down_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.23.mlp.gate_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.23.mlp.up_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.23.post_attention_layernorm.weight": "model-00005-of-00009.safetensors",
"model.layers.23.self_attn.k_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.23.self_attn.o_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.23.self_attn.q_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.23.self_attn.v_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.24.input_layernorm.weight": "model-00005-of-00009.safetensors",
"model.layers.24.mlp.down_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.24.mlp.gate_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.24.mlp.up_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.24.post_attention_layernorm.weight": "model-00005-of-00009.safetensors",
"model.layers.24.self_attn.k_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.24.self_attn.o_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.24.self_attn.q_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.24.self_attn.v_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.25.input_layernorm.weight": "model-00005-of-00009.safetensors",
"model.layers.25.mlp.down_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.25.mlp.gate_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.25.mlp.up_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.25.post_attention_layernorm.weight": "model-00005-of-00009.safetensors",
"model.layers.25.self_attn.k_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.25.self_attn.o_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.25.self_attn.q_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.25.self_attn.v_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.26.input_layernorm.weight": "model-00005-of-00009.safetensors",
"model.layers.26.mlp.down_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.26.mlp.gate_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.26.mlp.up_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.26.post_attention_layernorm.weight": "model-00005-of-00009.safetensors",
"model.layers.26.self_attn.k_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.26.self_attn.o_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.26.self_attn.q_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.26.self_attn.v_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.27.input_layernorm.weight": "model-00006-of-00009.safetensors",
"model.layers.27.mlp.down_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.27.mlp.gate_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.27.mlp.up_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.27.post_attention_layernorm.weight": "model-00006-of-00009.safetensors",
"model.layers.27.self_attn.k_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.27.self_attn.o_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.27.self_attn.q_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.27.self_attn.v_proj.weight": "model-00005-of-00009.safetensors",
"model.layers.28.input_layernorm.weight": "model-00006-of-00009.safetensors",
"model.layers.28.mlp.down_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.28.mlp.gate_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.28.mlp.up_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.28.post_attention_layernorm.weight": "model-00006-of-00009.safetensors",
"model.layers.28.self_attn.k_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.28.self_attn.o_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.28.self_attn.q_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.28.self_attn.v_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.29.input_layernorm.weight": "model-00006-of-00009.safetensors",
"model.layers.29.mlp.down_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.29.mlp.gate_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.29.mlp.up_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.29.post_attention_layernorm.weight": "model-00006-of-00009.safetensors",
"model.layers.29.self_attn.k_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.29.self_attn.o_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.29.self_attn.q_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.29.self_attn.v_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.3.input_layernorm.weight": "model-00001-of-00009.safetensors",
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00009.safetensors",
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.30.input_layernorm.weight": "model-00006-of-00009.safetensors",
"model.layers.30.mlp.down_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.30.mlp.gate_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.30.mlp.up_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.30.post_attention_layernorm.weight": "model-00006-of-00009.safetensors",
"model.layers.30.self_attn.k_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.30.self_attn.o_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.30.self_attn.q_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.30.self_attn.v_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.31.input_layernorm.weight": "model-00006-of-00009.safetensors",
"model.layers.31.mlp.down_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.31.mlp.gate_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.31.mlp.up_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.31.post_attention_layernorm.weight": "model-00006-of-00009.safetensors",
"model.layers.31.self_attn.k_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.31.self_attn.o_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.31.self_attn.q_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.31.self_attn.v_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.32.input_layernorm.weight": "model-00006-of-00009.safetensors",
"model.layers.32.mlp.down_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.32.mlp.gate_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.32.mlp.up_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.32.post_attention_layernorm.weight": "model-00006-of-00009.safetensors",
"model.layers.32.self_attn.k_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.32.self_attn.o_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.32.self_attn.q_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.32.self_attn.v_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.33.input_layernorm.weight": "model-00007-of-00009.safetensors",
"model.layers.33.mlp.down_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.33.mlp.gate_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.33.mlp.up_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.33.post_attention_layernorm.weight": "model-00007-of-00009.safetensors",
"model.layers.33.self_attn.k_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.33.self_attn.o_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.33.self_attn.q_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.33.self_attn.v_proj.weight": "model-00006-of-00009.safetensors",
"model.layers.34.input_layernorm.weight": "model-00007-of-00009.safetensors",
"model.layers.34.mlp.down_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.34.mlp.gate_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.34.mlp.up_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.34.post_attention_layernorm.weight": "model-00007-of-00009.safetensors",
"model.layers.34.self_attn.k_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.34.self_attn.o_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.34.self_attn.q_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.34.self_attn.v_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.35.input_layernorm.weight": "model-00007-of-00009.safetensors",
"model.layers.35.mlp.down_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.35.mlp.gate_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.35.mlp.up_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.35.post_attention_layernorm.weight": "model-00007-of-00009.safetensors",
"model.layers.35.self_attn.k_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.35.self_attn.o_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.35.self_attn.q_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.35.self_attn.v_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.36.input_layernorm.weight": "model-00007-of-00009.safetensors",
"model.layers.36.mlp.down_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.36.mlp.gate_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.36.mlp.up_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.36.post_attention_layernorm.weight": "model-00007-of-00009.safetensors",
"model.layers.36.self_attn.k_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.36.self_attn.o_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.36.self_attn.q_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.36.self_attn.v_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.37.input_layernorm.weight": "model-00007-of-00009.safetensors",
"model.layers.37.mlp.down_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.37.mlp.gate_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.37.mlp.up_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.37.post_attention_layernorm.weight": "model-00007-of-00009.safetensors",
"model.layers.37.self_attn.k_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.37.self_attn.o_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.37.self_attn.q_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.37.self_attn.v_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.38.input_layernorm.weight": "model-00007-of-00009.safetensors",
"model.layers.38.mlp.down_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.38.mlp.gate_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.38.mlp.up_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.38.post_attention_layernorm.weight": "model-00007-of-00009.safetensors",
"model.layers.38.self_attn.k_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.38.self_attn.o_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.38.self_attn.q_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.38.self_attn.v_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.39.input_layernorm.weight": "model-00008-of-00009.safetensors",
"model.layers.39.mlp.down_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.39.mlp.gate_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.39.mlp.up_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.39.post_attention_layernorm.weight": "model-00008-of-00009.safetensors",
"model.layers.39.self_attn.k_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.39.self_attn.o_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.39.self_attn.q_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.39.self_attn.v_proj.weight": "model-00007-of-00009.safetensors",
"model.layers.4.input_layernorm.weight": "model-00001-of-00009.safetensors",
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00009.safetensors",
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.40.input_layernorm.weight": "model-00008-of-00009.safetensors",
"model.layers.40.mlp.down_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.40.mlp.gate_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.40.mlp.up_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.40.post_attention_layernorm.weight": "model-00008-of-00009.safetensors",
"model.layers.40.self_attn.k_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.40.self_attn.o_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.40.self_attn.q_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.40.self_attn.v_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.41.input_layernorm.weight": "model-00008-of-00009.safetensors",
"model.layers.41.mlp.down_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.41.mlp.gate_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.41.mlp.up_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.41.post_attention_layernorm.weight": "model-00008-of-00009.safetensors",
"model.layers.41.self_attn.k_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.41.self_attn.o_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.41.self_attn.q_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.41.self_attn.v_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.42.input_layernorm.weight": "model-00008-of-00009.safetensors",
"model.layers.42.mlp.down_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.42.mlp.gate_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.42.mlp.up_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.42.post_attention_layernorm.weight": "model-00008-of-00009.safetensors",
"model.layers.42.self_attn.k_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.42.self_attn.o_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.42.self_attn.q_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.42.self_attn.v_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.43.input_layernorm.weight": "model-00008-of-00009.safetensors",
"model.layers.43.mlp.down_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.43.mlp.gate_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.43.mlp.up_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.43.post_attention_layernorm.weight": "model-00008-of-00009.safetensors",
"model.layers.43.self_attn.k_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.43.self_attn.o_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.43.self_attn.q_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.43.self_attn.v_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.44.input_layernorm.weight": "model-00009-of-00009.safetensors",
"model.layers.44.mlp.down_proj.weight": "model-00009-of-00009.safetensors",
"model.layers.44.mlp.gate_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.44.mlp.up_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.44.post_attention_layernorm.weight": "model-00009-of-00009.safetensors",
"model.layers.44.self_attn.k_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.44.self_attn.o_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.44.self_attn.q_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.44.self_attn.v_proj.weight": "model-00008-of-00009.safetensors",
"model.layers.45.input_layernorm.weight": "model-00009-of-00009.safetensors",
"model.layers.45.mlp.down_proj.weight": "model-00009-of-00009.safetensors",
"model.layers.45.mlp.gate_proj.weight": "model-00009-of-00009.safetensors",
"model.layers.45.mlp.up_proj.weight": "model-00009-of-00009.safetensors",
"model.layers.45.post_attention_layernorm.weight": "model-00009-of-00009.safetensors",
"model.layers.45.self_attn.k_proj.weight": "model-00009-of-00009.safetensors",
"model.layers.45.self_attn.o_proj.weight": "model-00009-of-00009.safetensors",
"model.layers.45.self_attn.q_proj.weight": "model-00009-of-00009.safetensors",
"model.layers.45.self_attn.v_proj.weight": "model-00009-of-00009.safetensors",
"model.layers.46.input_layernorm.weight": "model-00009-of-00009.safetensors",
"model.layers.46.mlp.down_proj.weight": "model-00009-of-00009.safetensors",
"model.layers.46.mlp.gate_proj.weight": "model-00009-of-00009.safetensors",
"model.layers.46.mlp.up_proj.weight": "model-00009-of-00009.safetensors",
"model.layers.46.post_attention_layernorm.weight": "model-00009-of-00009.safetensors",
"model.layers.46.self_attn.k_proj.weight": "model-00009-of-00009.safetensors",
"model.layers.46.self_attn.o_proj.weight": "model-00009-of-00009.safetensors",
"model.layers.46.self_attn.q_proj.weight": "model-00009-of-00009.safetensors",
"model.layers.46.self_attn.v_proj.weight": "model-00009-of-00009.safetensors",
"model.layers.47.input_layernorm.weight": "model-00009-of-00009.safetensors",
"model.layers.47.mlp.down_proj.weight": "model-00009-of-00009.safetensors",
"model.layers.47.mlp.gate_proj.weight": "model-00009-of-00009.safetensors",
"model.layers.47.mlp.up_proj.weight": "model-00009-of-00009.safetensors",
"model.layers.47.post_attention_layernorm.weight": "model-00009-of-00009.safetensors",
"model.layers.47.self_attn.k_proj.weight": "model-00009-of-00009.safetensors",
"model.layers.47.self_attn.o_proj.weight": "model-00009-of-00009.safetensors",
"model.layers.47.self_attn.q_proj.weight": "model-00009-of-00009.safetensors",
"model.layers.47.self_attn.v_proj.weight": "model-00009-of-00009.safetensors",
"model.layers.5.input_layernorm.weight": "model-00002-of-00009.safetensors",
"model.layers.5.mlp.down_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.5.mlp.gate_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.5.mlp.up_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.5.post_attention_layernorm.weight": "model-00002-of-00009.safetensors",
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.5.self_attn.o_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00009.safetensors",
"model.layers.6.input_layernorm.weight": "model-00002-of-00009.safetensors",
"model.layers.6.mlp.down_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.6.mlp.gate_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.6.mlp.up_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.6.post_attention_layernorm.weight": "model-00002-of-00009.safetensors",
"model.layers.6.self_attn.k_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.6.self_attn.o_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.6.self_attn.q_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.6.self_attn.v_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.7.input_layernorm.weight": "model-00002-of-00009.safetensors",
"model.layers.7.mlp.down_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.7.mlp.gate_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.7.mlp.up_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.7.post_attention_layernorm.weight": "model-00002-of-00009.safetensors",
"model.layers.7.self_attn.k_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.7.self_attn.o_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.7.self_attn.q_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.7.self_attn.v_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.8.input_layernorm.weight": "model-00002-of-00009.safetensors",
"model.layers.8.mlp.down_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.8.mlp.gate_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.8.mlp.up_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.8.post_attention_layernorm.weight": "model-00002-of-00009.safetensors",
"model.layers.8.self_attn.k_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.8.self_attn.o_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.8.self_attn.q_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.8.self_attn.v_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.9.input_layernorm.weight": "model-00002-of-00009.safetensors",
"model.layers.9.mlp.down_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.9.mlp.gate_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.9.mlp.up_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.9.post_attention_layernorm.weight": "model-00002-of-00009.safetensors",
"model.layers.9.self_attn.k_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.9.self_attn.o_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.9.self_attn.q_proj.weight": "model-00002-of-00009.safetensors",
"model.layers.9.self_attn.v_proj.weight": "model-00002-of-00009.safetensors",
"model.norm.weight": "model-00009-of-00009.safetensors"
}
}

30
special_tokens_map.json Normal file
View File

@@ -0,0 +1,30 @@
{
"bos_token": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "[PAD]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

91131
tokenizer.json Normal file

File diff suppressed because it is too large Load Diff

51
tokenizer_config.json Normal file
View File

@@ -0,0 +1,51 @@
{
"add_bos_token": true,
"add_eos_token": false,
"added_tokens_decoder": {
"0": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"32000": {
"content": "[PAD]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [],
"bos_token": "<s>",
"chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
"clean_up_tokenization_spaces": false,
"eos_token": "</s>",
"legacy": true,
"model_max_length": 1000000000000000019884624838656,
"pad_token": "[PAD]",
"sp_model_kwargs": {},
"spaces_between_special_tokens": false,
"tokenizer_class": "LlamaTokenizer",
"unk_token": "<unk>",
"use_default_system_prompt": false
}