初始化项目,由ModelHub XC社区提供模型
Model: maywell/EEVE-Korean-10.8B-v1.0-16k Source: Original Platform
This commit is contained in:
52
.gitattributes
vendored
Normal file
52
.gitattributes
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
|
||||
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
||||
*.tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
*.db* filter=lfs diff=lfs merge=lfs -text
|
||||
*.ark* filter=lfs diff=lfs merge=lfs -text
|
||||
**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text
|
||||
**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text
|
||||
**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.gguf* filter=lfs diff=lfs merge=lfs -text
|
||||
*.ggml filter=lfs diff=lfs merge=lfs -text
|
||||
*.llamafile* filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
|
||||
pytorch_model-00002-of-00003.bin filter=lfs diff=lfs merge=lfs -text
|
||||
pytorch_model-00001-of-00003.bin filter=lfs diff=lfs merge=lfs -text
|
||||
pytorch_model-00003-of-00003.bin filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
31
.ipynb_checkpoints/config-checkpoint.json
Normal file
31
.ipynb_checkpoints/config-checkpoint.json
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"_name_or_path": "yanolja/EEVE-Korean-10.8B-v1.0",
|
||||
"architectures": [
|
||||
"LlamaForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 1,
|
||||
"eos_token_id": 32000,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 4096,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 14336,
|
||||
"max_position_embeddings": 4096,
|
||||
"model_type": "llama",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 48,
|
||||
"num_key_value_heads": 8,
|
||||
"pretraining_tp": 1,
|
||||
"rms_norm_eps": 1e-05,
|
||||
"rope_scaling": {
|
||||
"factor": 4.0,
|
||||
"type": "linear"
|
||||
},
|
||||
"rope_theta": 10000.0,
|
||||
"tie_word_embeddings": false,
|
||||
"torch_dtype": "float16",
|
||||
"transformers_version": "4.34.0",
|
||||
"use_cache": false,
|
||||
"vocab_size": 40960
|
||||
}
|
||||
98
README.md
Normal file
98
README.md
Normal file
@@ -0,0 +1,98 @@
|
||||
---
|
||||
license: apache-2.0
|
||||
base_model: yanolja/EEVE-Korean-10.8B-v1.0
|
||||
---
|
||||
[<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
|
||||
|
||||
<p align="left">
|
||||
<img src="https://huggingface.co/yanolja/EEVE-Korean-10.8B-v1.0/resolve/main/eeve_logo.webp" width="50%"/>
|
||||
<p>
|
||||
|
||||
# EEVE-Korean-10.8B-v1.0
|
||||
|
||||
## Join Our Community on Discord!
|
||||
|
||||
If you're passionate about the field of Large Language Models and wish to exchange knowledge and insights, we warmly invite you to join our Discord server. It's worth noting that Korean is the primary language used in this server. The landscape of LLM is evolving rapidly, and without active sharing, our collective knowledge risks becoming outdated swiftly. Let's collaborate and drive greater impact together! Join us here: [Discord Link](https://discord.gg/b27bAHg95m).
|
||||
|
||||
## Our Dedicated Team (Alphabetical Order)
|
||||
| Research | Engineering | Product Management | UX Design |
|
||||
|-----------------|-----------------|--------------------|--------------
|
||||
| Myeongho Jeong | Geon Kim | Bokyung Huh | Eunsue Choi |
|
||||
| Seungduk Kim | Rifqi Alfi | | |
|
||||
| Seungtaek Choi | Sanghoon Han | | |
|
||||
| | Suhyun Kang | | |
|
||||
|
||||
## About the Model
|
||||
|
||||
This model is a Korean vocabulary-extended version of [upstage/SOLAR-10.7B-v1.0](https://huggingface.co/upstage/SOLAR-10.7B-v1.0), specifically fine-tuned on various Korean web-crawled datasets available on HuggingFace. Our approach was to expand the model's understanding of Korean by pre-training the embeddings for new tokens and partially fine-tuning the `lm_head` embeddings for the already existing tokens while preserving the original parameters of the base model.
|
||||
|
||||
### Technical Deep Dive
|
||||
<p align="left">
|
||||
<img src="https://huggingface.co/yanolja/EEVE-Korean-10.8B-v1.0/resolve/main/EEVE_figure.png" width="100%"/>
|
||||
<p>
|
||||
|
||||
To adapt foundational models from English to Korean, we use subword-based embedding with a seven-stage training process involving parameter freezing.
|
||||
This approach progressively trains from input embeddings to full parameters, efficiently extending the model's vocabulary to include Korean.
|
||||
Our method enhances the model's cross-linguistic applicability by carefully integrating new linguistic tokens, focusing on causal language modeling pre-training.
|
||||
We leverage the inherent capabilities of foundational models trained on English to efficiently transfer knowledge and reasoning to Korean, optimizing the adaptation process.
|
||||
|
||||
For more details, please refer to our technical report: [Efficient and Effective Vocabulary Expansion Towards Multilingual Large Language Models](https://arxiv.org/abs/2402.14714).
|
||||
|
||||
Here’s an simplified code for our key approach:
|
||||
|
||||
```python
|
||||
# number_of_old_tokens is the size of tokenizer before vocab extension. For example, in case of EEVE-Korean-10.8B-v1.0, number_of_old_tokens is 32000.
|
||||
def freeze_partial_embedding_hook(grad):
|
||||
grad[:number_of_old_tokens] = 0
|
||||
return grad
|
||||
|
||||
for name, param in model.named_parameters():
|
||||
if ("lm_head" in name or "embed_tokens" in name) and "original" not in name:
|
||||
param.requires_grad = True
|
||||
if "embed_tokens" in name:
|
||||
param.register_hook(freeze_partial_embedding_hook)
|
||||
else:
|
||||
param.requires_grad = False
|
||||
```
|
||||
|
||||
### Usage and Limitations
|
||||
|
||||
Keep in mind that this model hasn't been fine-tuned with instruction-based training. While it excels in Korean language tasks, we advise careful consideration and further training for specific applications.
|
||||
|
||||
### Training Details
|
||||
|
||||
Our model’s training was comprehensive and diverse:
|
||||
|
||||
- **Vocabulary Expansion:**
|
||||
We meticulously selected 8,960 Korean tokens based on their frequency in our Korean web corpus. This process involved multiple rounds of tokenizer training, manual curation, and token frequency analysis, ensuring a rich and relevant vocabulary for our model.
|
||||
|
||||
1. **Initial Tokenizer Training:** We trained an intermediate tokenizer on a Korean web corpus, with a vocabulary of 40,000 tokens.
|
||||
|
||||
2. **Extraction of New Korean Tokens:** From the intermediate tokenizer, we identified all Korean tokens not present in the original SOLAR's tokenizer.
|
||||
|
||||
3. **Manual Tokenizer Construction:** We then built the target tokenizer, focusing on these new Korean tokens.
|
||||
|
||||
4. **Frequency Analysis:** Using the target tokenizer, we processed a 100GB Korean corpus to count each token's frequency.
|
||||
|
||||
5. **Refinement of Token List:** We removed tokens appearing less than 6,000 times, ensuring to secure enough tokens to train models later.
|
||||
|
||||
6. **Inclusion of Single-Letter Characters:** Counted missing Korean single-letter characters and added them to the target tokenizer that appeared more than 6,000 times.
|
||||
|
||||
7. **Iterative Refinement:** We repeated steps 2 to 6 until there were no tokens to drop or add.
|
||||
|
||||
8. **Training Bias Towards New Tokens:** Our training data was biased to include more texts with new tokens, for effective learning.
|
||||
|
||||
This rigorous approach ensured a comprehensive and contextually rich Korean vocabulary for the model.
|
||||
|
||||
## Citation
|
||||
|
||||
```
|
||||
@misc{kim2024efficient,
|
||||
title={Efficient and Effective Vocabulary Expansion Towards Multilingual Large Language Models},
|
||||
author={Seungduk Kim and Seungtaek Choi and Myeongho Jeong},
|
||||
year={2024},
|
||||
eprint={2402.14714},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CL}
|
||||
}
|
||||
```
|
||||
31
config.json
Normal file
31
config.json
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"_name_or_path": "yanolja/EEVE-Korean-10.8B-v1.0",
|
||||
"architectures": [
|
||||
"LlamaForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 1,
|
||||
"eos_token_id": 32000,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 4096,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 14336,
|
||||
"max_position_embeddings": 16384,
|
||||
"model_type": "llama",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 48,
|
||||
"num_key_value_heads": 8,
|
||||
"pretraining_tp": 1,
|
||||
"rms_norm_eps": 1e-05,
|
||||
"rope_scaling": {
|
||||
"factor": 4.0,
|
||||
"type": "linear"
|
||||
},
|
||||
"rope_theta": 10000.0,
|
||||
"tie_word_embeddings": false,
|
||||
"torch_dtype": "float16",
|
||||
"transformers_version": "4.34.0",
|
||||
"use_cache": false,
|
||||
"vocab_size": 40960
|
||||
}
|
||||
1
configuration.json
Normal file
1
configuration.json
Normal file
@@ -0,0 +1 @@
|
||||
{"framework": "pytorch", "task": "text-generation", "allow_remote": true}
|
||||
7
generation_config.json
Normal file
7
generation_config.json
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"_from_model_config": true,
|
||||
"bos_token_id": 1,
|
||||
"eos_token_id": 32000,
|
||||
"transformers_version": "4.34.0",
|
||||
"use_cache": false
|
||||
}
|
||||
3
pytorch_model-00001-of-00003.bin
Normal file
3
pytorch_model-00001-of-00003.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:57ed18107fdcb2fa4dbaa0e01d8bb7612f4f82aef8793a3d6cbdc028fd2535f0
|
||||
size 9982876968
|
||||
3
pytorch_model-00002-of-00003.bin
Normal file
3
pytorch_model-00002-of-00003.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1e21f16436b0ab7cb0ec98d682a35f9dc9fca496ecb552680ff7baae8e3fa470
|
||||
size 9982894110
|
||||
3
pytorch_model-00003-of-00003.bin
Normal file
3
pytorch_model-00003-of-00003.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:261e0f5ac60e3fc981ff9f3eda19f9541c21029bd7ff07363a9e6e5c210245eb
|
||||
size 1644234996
|
||||
442
pytorch_model.bin.index.json
Normal file
442
pytorch_model.bin.index.json
Normal file
@@ -0,0 +1,442 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_size": 21609848832
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.embed_tokens.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.10.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.10.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.10.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.10.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.10.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.10.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.10.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.10.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.10.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.11.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.11.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.11.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.11.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.11.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.11.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.11.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.11.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.11.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.12.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.12.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.12.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.12.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.12.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.12.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.12.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.12.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.12.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.13.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.13.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.13.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.13.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.13.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.13.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.13.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.13.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.13.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.14.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.14.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.14.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.14.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.14.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.14.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.14.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.14.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.14.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.15.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.15.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.15.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.15.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.15.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.15.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.15.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.15.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.15.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.16.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.16.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.16.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.16.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.16.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.16.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.16.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.16.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.16.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.17.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.17.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.17.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.17.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.17.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.17.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.17.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.17.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.17.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.18.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.18.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.18.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.18.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.18.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.18.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.18.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.18.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.18.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.19.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.19.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.19.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.19.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.19.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.19.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.19.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.19.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.19.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.20.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.20.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.20.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.20.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.20.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.20.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.20.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.20.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.20.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.21.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.21.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.21.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.21.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.21.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.21.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.21.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.21.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.21.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.22.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.22.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.22.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.22.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.22.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.22.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.22.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.22.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.22.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.23.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.23.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.23.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.23.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.23.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.23.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.23.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.23.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.23.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.24.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.24.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.24.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.24.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.24.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.24.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.24.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.24.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.24.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.25.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.25.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.25.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.25.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.25.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.25.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.25.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.25.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.25.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.26.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.26.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.26.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.26.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.26.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.26.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.26.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.26.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.26.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.27.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.27.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.27.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.27.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.27.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.27.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.27.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.27.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.27.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.28.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.28.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.28.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.28.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.28.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.28.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.28.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.28.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.28.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.29.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.29.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.29.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.29.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.29.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.29.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.29.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.29.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.29.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.30.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.30.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.30.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.30.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.30.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.30.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.30.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.30.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.30.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.31.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.31.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.31.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.31.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.31.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.31.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.31.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.31.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.31.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.32.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.32.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.32.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.32.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.32.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.32.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.32.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.32.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.32.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.33.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.33.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.33.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.33.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.33.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.33.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.33.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.33.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.33.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.34.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.34.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.34.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.34.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.34.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.34.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.34.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.34.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.34.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.35.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.35.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.35.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.35.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.35.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.35.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.35.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.35.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.35.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.36.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.36.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.36.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.36.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.36.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.36.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.36.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.36.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.36.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.37.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.37.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.37.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.37.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.37.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.37.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.37.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.37.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.37.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.38.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.38.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.38.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.38.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.38.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.38.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.38.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.38.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.38.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.39.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.39.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.39.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.39.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.39.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.39.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.39.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.39.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.39.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.40.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.40.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.40.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.40.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.40.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.40.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.40.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.40.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.40.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.41.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.41.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.41.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.41.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.41.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.41.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.41.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.41.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.41.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.42.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.42.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.42.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.42.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.42.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.42.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.42.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.42.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.42.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.43.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.43.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.43.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.43.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.43.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.43.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.43.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.43.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.43.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.44.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.44.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.44.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.44.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.44.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.44.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.44.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.44.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.44.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
|
||||
"model.layers.45.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.45.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.45.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.45.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.45.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.45.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.45.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.45.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.45.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.46.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.46.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.46.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.46.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.46.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.46.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.46.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.46.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.46.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.47.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.47.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.47.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.47.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.47.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.47.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.47.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.47.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.47.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
|
||||
"model.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.5.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.5.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.5.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.5.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.6.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.6.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.6.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.6.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.6.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.6.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.6.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.6.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.7.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.7.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.7.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.7.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.7.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.7.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.7.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.7.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.8.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.8.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.8.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.8.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.8.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.8.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.8.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.8.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.9.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.9.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.9.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.9.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.9.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.9.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.9.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.9.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.layers.9.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
|
||||
"model.norm.weight": "pytorch_model-00003-of-00003.bin"
|
||||
}
|
||||
}
|
||||
12
special_tokens_map.json
Normal file
12
special_tokens_map.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"additional_special_tokens": [
|
||||
"<unk>",
|
||||
"<s>",
|
||||
"</s>",
|
||||
"<|im_end|>"
|
||||
],
|
||||
"bos_token": "<s>",
|
||||
"eos_token": "<|im_end|>",
|
||||
"pad_token": "</s>",
|
||||
"unk_token": "<unk>"
|
||||
}
|
||||
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:02b0589ffe8224ef1f6992363681886963e53cdc69e11a8bfdf1e8742bd6cbb4
|
||||
size 2175507
|
||||
57
tokenizer_config.json
Normal file
57
tokenizer_config.json
Normal file
@@ -0,0 +1,57 @@
|
||||
{
|
||||
"added_tokens_decoder": {
|
||||
"0": {
|
||||
"content": "<unk>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"1": {
|
||||
"content": "<s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"2": {
|
||||
"content": "</s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"32000": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
}
|
||||
},
|
||||
"additional_special_tokens": [
|
||||
"<unk>",
|
||||
"<s>",
|
||||
"</s>",
|
||||
"<|im_end|>"
|
||||
],
|
||||
"bos_token": "<s>",
|
||||
"chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = 'You are a helpful assistant.' %}{% endif %}{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in loop_messages %}{% if loop.index0 == 0 %}{{'<|im_start|>system\n' + system_message + '<|im_end|>\n'}}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|im_end|>",
|
||||
"legacy": true,
|
||||
"model_max_length": 16384,
|
||||
"pad_token": "</s>",
|
||||
"padding_side": "right",
|
||||
"sp_model_kwargs": {},
|
||||
"spaces_between_special_tokens": false,
|
||||
"tokenizer_class": "LlamaTokenizer",
|
||||
"trust_remote_code": false,
|
||||
"unk_token": "<unk>",
|
||||
"use_default_system_prompt": false,
|
||||
"use_fast": true
|
||||
}
|
||||
Reference in New Issue
Block a user