初始化项目,由ModelHub XC社区提供模型

Model: Hugofernandez/Mistral-7B-v0.1-colab-sharded
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-24 03:49:17 +08:00
commit 97165c43a5
14 changed files with 91620 additions and 0 deletions

35
.gitattributes vendored Normal file
View File

@@ -0,0 +1,35 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text

52
README.md Normal file
View File

@@ -0,0 +1,52 @@
---
license: apache-2.0
pipeline_tag: text-generation
language:
- en
tags:
- pretrained
inference:
parameters:
temperature: 0.7
---
# Model Card for Mistral-7B-v0.1-colab-sharded
This model is a re-sharded version of Mistral-7B-v0.1, with more shard (6 instead of 2), that enable an easier loading on machine that don't have a lot of RAM such a free version of Google Colab.
The original model can be found here : https://huggingface.co/mistralai/Mistral-7B-v0.1
## Model Description
The Mistral-7B-v0.1 Large Language Model (LLM) is a pretrained generative text model with 7 billion parameters.
Mistral-7B-v0.1 outperforms Llama 2 13B on all benchmarks we tested.
For full details of this model please read the [paper](https://arxiv.org/abs/2310.06825) and [release blog post](https://mistral.ai/news/announcing-mistral-7b/).
## Model Architecture
Mistral-7B-v0.1 is a transformer model, with the following architecture choices:
- Grouped-Query Attention
- Sliding-Window Attention
- Byte-fallback BPE tokenizer
## Troubleshooting
- If you see the following error:
```
KeyError: 'mistral'
```
- Or:
```
NotImplementedError: Cannot copy out of meta tensor; no data!
```
Ensure you are utilizing a stable version of Transformers, 4.34.0 or newer.
## Notice
Mistral 7B is a pretrained base model and therefore does not have any moderation mechanisms.
## The Mistral AI Team
Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.

25
config.json Normal file
View File

@@ -0,0 +1,25 @@
{
"_name_or_path": "mistralai/Mistral-7B-v0.1",
"architectures": [
"MistralForCausalLM"
],
"bos_token_id": 1,
"eos_token_id": 2,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 14336,
"max_position_embeddings": 32768,
"model_type": "mistral",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 8,
"rms_norm_eps": 1e-05,
"rope_theta": 10000.0,
"sliding_window": 4096,
"tie_word_embeddings": false,
"torch_dtype": "float32",
"transformers_version": "4.35.2",
"use_cache": true,
"vocab_size": 32000
}

6
generation_config.json Normal file
View File

@@ -0,0 +1,6 @@
{
"_from_model_config": true,
"bos_token_id": 1,
"eos_token_id": 2,
"transformers_version": "4.35.2"
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f3b1dea8ba007c65ffca0ac7b69376822ce920e648970f434cbdbd72653666db
size 4987208236

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:dfa5f19fa158f854e7f0b3545feb6b31f6b18fa5b20fe1bcb7ce1ace54edaf35
size 4899127538

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:34924d934b52eacefc758aa358c05aa5a2eab78e8062e10b91f543eb471b2a0f
size 4999825320

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:7e1be4a3852a12ed579befc65454a2f7b9184babd6e46a33c7b0ac56c6b0f703
size 4999825316

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1d9c60f89ff19f645baeeaaa392b8427c2c92c137e5c6957be47ed0b0536f280
size 4832018324

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f542a936924955cdf1cf00287360ea7499a2cf3ad5aa16cc7bf3854bd808fd3d
size 4249024332

View File

@@ -0,0 +1,298 @@
{
"metadata": {
"total_size": 28966928384
},
"weight_map": {
"lm_head.weight": "pytorch_model-00006-of-00006.bin",
"model.embed_tokens.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.10.input_layernorm.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.10.mlp.down_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.10.mlp.gate_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.10.mlp.up_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.10.post_attention_layernorm.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.10.self_attn.k_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.10.self_attn.o_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.10.self_attn.q_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.10.self_attn.v_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.11.input_layernorm.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.11.mlp.down_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.11.mlp.gate_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.11.mlp.up_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.11.post_attention_layernorm.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.11.self_attn.k_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.11.self_attn.o_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.11.self_attn.q_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.11.self_attn.v_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.12.input_layernorm.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.12.mlp.down_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.12.mlp.gate_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.12.mlp.up_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.12.post_attention_layernorm.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.12.self_attn.k_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.12.self_attn.o_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.12.self_attn.q_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.12.self_attn.v_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.13.input_layernorm.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.13.mlp.down_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.13.mlp.gate_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.13.mlp.up_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.13.post_attention_layernorm.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.13.self_attn.k_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.13.self_attn.o_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.13.self_attn.q_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.13.self_attn.v_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.14.input_layernorm.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.14.mlp.down_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.14.mlp.gate_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.14.mlp.up_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.14.post_attention_layernorm.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.14.self_attn.k_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.14.self_attn.o_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.14.self_attn.q_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.14.self_attn.v_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.15.input_layernorm.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.15.mlp.down_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.15.mlp.gate_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.15.mlp.up_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.15.post_attention_layernorm.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.15.self_attn.k_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.15.self_attn.o_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.15.self_attn.q_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.15.self_attn.v_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.16.input_layernorm.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.16.mlp.down_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.16.mlp.gate_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.16.mlp.up_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.16.post_attention_layernorm.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.16.self_attn.k_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.16.self_attn.o_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.16.self_attn.q_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.16.self_attn.v_proj.weight": "pytorch_model-00003-of-00006.bin",
"model.layers.17.input_layernorm.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.17.mlp.down_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.17.mlp.gate_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.17.mlp.up_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.17.post_attention_layernorm.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.17.self_attn.k_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.17.self_attn.o_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.17.self_attn.q_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.17.self_attn.v_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.18.input_layernorm.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.18.mlp.down_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.18.mlp.gate_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.18.mlp.up_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.18.post_attention_layernorm.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.18.self_attn.k_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.18.self_attn.o_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.18.self_attn.q_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.18.self_attn.v_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.19.input_layernorm.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.19.mlp.down_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.19.mlp.gate_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.19.mlp.up_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.19.post_attention_layernorm.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.19.self_attn.k_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.19.self_attn.o_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.19.self_attn.q_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.19.self_attn.v_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.20.input_layernorm.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.20.mlp.down_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.20.mlp.gate_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.20.mlp.up_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.20.post_attention_layernorm.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.20.self_attn.k_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.20.self_attn.o_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.20.self_attn.q_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.20.self_attn.v_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.21.input_layernorm.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.21.mlp.down_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.21.mlp.gate_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.21.mlp.up_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.21.post_attention_layernorm.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.21.self_attn.k_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.21.self_attn.o_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.21.self_attn.q_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.21.self_attn.v_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.22.input_layernorm.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.22.mlp.down_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.22.mlp.gate_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.22.mlp.up_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.22.post_attention_layernorm.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.22.self_attn.k_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.22.self_attn.o_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.22.self_attn.q_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.22.self_attn.v_proj.weight": "pytorch_model-00004-of-00006.bin",
"model.layers.23.input_layernorm.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.23.mlp.down_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.23.mlp.gate_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.23.mlp.up_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.23.post_attention_layernorm.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.23.self_attn.k_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.23.self_attn.o_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.23.self_attn.q_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.23.self_attn.v_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.24.input_layernorm.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.24.mlp.down_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.24.mlp.gate_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.24.mlp.up_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.24.post_attention_layernorm.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.24.self_attn.k_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.24.self_attn.o_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.24.self_attn.q_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.24.self_attn.v_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.25.input_layernorm.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.25.mlp.down_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.25.mlp.gate_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.25.mlp.up_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.25.post_attention_layernorm.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.25.self_attn.k_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.25.self_attn.o_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.25.self_attn.q_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.25.self_attn.v_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.26.input_layernorm.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.26.mlp.down_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.26.mlp.gate_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.26.mlp.up_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.26.post_attention_layernorm.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.26.self_attn.k_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.26.self_attn.o_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.26.self_attn.q_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.26.self_attn.v_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.27.input_layernorm.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.27.mlp.down_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.27.mlp.gate_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.27.mlp.up_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.27.post_attention_layernorm.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.27.self_attn.k_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.27.self_attn.o_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.27.self_attn.q_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.27.self_attn.v_proj.weight": "pytorch_model-00005-of-00006.bin",
"model.layers.28.input_layernorm.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.28.mlp.down_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.28.mlp.gate_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.28.mlp.up_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.28.post_attention_layernorm.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.28.self_attn.k_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.28.self_attn.o_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.28.self_attn.q_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.28.self_attn.v_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.29.input_layernorm.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.29.mlp.down_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.29.mlp.gate_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.29.mlp.up_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.29.post_attention_layernorm.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.29.self_attn.k_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.29.self_attn.o_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.29.self_attn.q_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.29.self_attn.v_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.30.input_layernorm.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.30.mlp.down_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.30.mlp.gate_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.30.mlp.up_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.30.post_attention_layernorm.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.30.self_attn.k_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.30.self_attn.o_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.30.self_attn.q_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.30.self_attn.v_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.31.input_layernorm.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.31.mlp.down_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.31.mlp.gate_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.31.mlp.up_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.31.post_attention_layernorm.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.31.self_attn.k_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.31.self_attn.o_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.31.self_attn.q_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.31.self_attn.v_proj.weight": "pytorch_model-00006-of-00006.bin",
"model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.5.input_layernorm.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.5.mlp.down_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.5.mlp.gate_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.5.mlp.up_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.5.post_attention_layernorm.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.5.self_attn.o_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00006.bin",
"model.layers.6.input_layernorm.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.6.mlp.down_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.6.mlp.gate_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.6.mlp.up_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.6.post_attention_layernorm.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.6.self_attn.k_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.6.self_attn.o_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.6.self_attn.q_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.6.self_attn.v_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.7.input_layernorm.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.7.mlp.down_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.7.mlp.gate_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.7.mlp.up_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.7.post_attention_layernorm.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.7.self_attn.k_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.7.self_attn.o_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.7.self_attn.q_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.7.self_attn.v_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.8.input_layernorm.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.8.mlp.down_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.8.mlp.gate_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.8.mlp.up_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.8.post_attention_layernorm.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.8.self_attn.k_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.8.self_attn.o_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.8.self_attn.q_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.8.self_attn.v_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.9.input_layernorm.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.9.mlp.down_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.9.mlp.gate_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.9.mlp.up_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.9.post_attention_layernorm.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.9.self_attn.k_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.9.self_attn.o_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.9.self_attn.q_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.layers.9.self_attn.v_proj.weight": "pytorch_model-00002-of-00006.bin",
"model.norm.weight": "pytorch_model-00006-of-00006.bin"
}
}

24
special_tokens_map.json Normal file
View File

@@ -0,0 +1,24 @@
{
"bos_token": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": "<unk>",
"unk_token": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

91122
tokenizer.json Normal file

File diff suppressed because it is too large Load Diff

40
tokenizer_config.json Normal file
View File

@@ -0,0 +1,40 @@
{
"added_tokens_decoder": {
"0": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [],
"bos_token": "<s>",
"clean_up_tokenization_spaces": false,
"eos_token": "</s>",
"legacy": true,
"model_max_length": 1000000000000000019884624838656,
"pad_token": "<unk>",
"sp_model_kwargs": {},
"spaces_between_special_tokens": false,
"tokenizer_class": "LlamaTokenizer",
"unk_token": "<unk>",
"use_default_system_prompt": true
}