初始化项目,由ModelHub XC社区提供模型

Model: maywell/EEVE-Korean-Instruct-10.8B-v1.0-32k
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-06-22 12:44:12 +08:00
commit 8c88209a14
13 changed files with 774 additions and 0 deletions

52
.gitattributes vendored Normal file
View File

@@ -0,0 +1,52 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zstandard filter=lfs diff=lfs merge=lfs -text
*.tfevents* filter=lfs diff=lfs merge=lfs -text
*.db* filter=lfs diff=lfs merge=lfs -text
*.ark* filter=lfs diff=lfs merge=lfs -text
**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text
**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text
**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.gguf* filter=lfs diff=lfs merge=lfs -text
*.ggml filter=lfs diff=lfs merge=lfs -text
*.llamafile* filter=lfs diff=lfs merge=lfs -text
*.pt2 filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
tokenizer.json filter=lfs diff=lfs merge=lfs -text
pytorch_model-00002-of-00003.bin filter=lfs diff=lfs merge=lfs -text
pytorch_model-00003-of-00003.bin filter=lfs diff=lfs merge=lfs -text
pytorch_model-00001-of-00003.bin filter=lfs diff=lfs merge=lfs -text

View File

@@ -0,0 +1,31 @@
{
"_name_or_path": "maywell/EEVE-Korean-Instruct-10.8B-v1.0-32k",
"architectures": [
"LlamaForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 1,
"eos_token_id": 32000,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 14336,
"max_position_embeddings": 32768,
"model_type": "llama",
"num_attention_heads": 32,
"num_hidden_layers": 48,
"num_key_value_heads": 8,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_scaling": {
"factor": 8.0,
"type": "linear"
},
"rope_theta": 10000.0,
"tie_word_embeddings": false,
"torch_dtype": "float16",
"transformers_version": "4.34.0",
"use_cache": false,
"vocab_size": 40960
}

129
README.md Normal file
View File

@@ -0,0 +1,129 @@
---
license: apache-2.0
base_model: yanolja/EEVE-Korean-Instruct-10.8B-v1.0
---
# Original Model Card
[<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
<p align="left">
<img src="https://huggingface.co/yanolja/EEVE-Korean-Instruct-10.8B-v1.0/resolve/main/eeve_logo.webp" width="50%"/>
<p>
# EEVE-Korean-Instruct-10.8B-v1.0
## Join Our Community on Discord!
If you're passionate about the field of Large Language Models and wish to exchange knowledge and insights, we warmly invite you to join our Discord server. It's worth noting that Korean is the primary language used in this server. The landscape of LLM is evolving rapidly, and without active sharing, our collective knowledge risks becoming outdated swiftly. Let's collaborate and drive greater impact together! Join us here: [Discord Link](https://discord.gg/b27bAHg95m).
## Our Dedicated Team (Alphabetical Order)
| Research | Engineering | Product Management | UX Design |
|-----------------|-----------------|--------------------|--------------
| Myeongho Jeong | Geon Kim | Bokyung Huh | Eunsue Choi |
| Seungduk Kim | Rifqi Alfi | | |
| Seungtaek Choi | Sanghoon Han | | |
| | Suhyun Kang | | |
## About the Model
This model is a fine-tuned version of [yanolja/EEVE-Korean-10.8B-v1.0](https://huggingface.co/yanolja/EEVE-Korean-10.8B-v1.0), which is a Korean vocabulary-extended version of [upstage/SOLAR-10.7B-v1.0](https://huggingface.co/upstage/SOLAR-10.7B-v1.0). Specifically, we utilized Direct Preference Optimization (DPO) through the use of [Axolotl](https://github.com/OpenAccess-AI-Collective/axolotl).
For more details, please refer to our technical report: [Efficient and Effective Vocabulary Expansion Towards Multilingual Large Language Models](https://arxiv.org/abs/2402.14714).
## Prompt Template
```
A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
Human: {prompt}
Assistant:
```
## How to Use it
```python
from transformers import AutoTokenizer
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained("yanolja/EEVE-Korean-Instruct-10.8B-v1.0")
tokenizer = AutoTokenizer.from_pretrained("yanolja/EEVE-Korean-Instruct-10.8B-v1.0")
prompt_template = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\nHuman: {prompt}\nAssistant:\n"
text = '한국의 수도는 어디인가요? 아래 선택지 중 골라주세요.\n\n(A) 경성\n(B) 부산\n(C) 평양\n(D) 서울\n(E) 전주'
model_inputs = tokenizer(prompt_template.format(prompt=text), return_tensors='pt')
outputs = model.generate(**model_inputs, max_new_tokens=256)
output_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
print(output_text)
```
### Example Output
```
A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
Human: 한국의 수도는 어디인가요? 아래 선택지 중 골라주세요.
(A) 경성
(B) 부산
(C) 평양
(D) 서울
(E) 전주
Assistant:
(D) 서울이 한국의 수도입니다. 서울은 나라의 북동부에 위치해 있으며, 정치, 경제, 문화의 중심지입니다. 약 1,000만 명이 넘는 인구를 가진 세계에서 가장 큰 도시 중 하나입니다. 서울은 높은 빌딩, 현대적인 인프라, 활기 문화 장면으로 유명합니다. 또한, 많은 역사적 명소와 박물관이 있어 방문객들에게 풍부한 문화 체험을 제공합니다.
```
### Training Data
- Korean-translated version of [Open-Orca/SlimOrca-Dedup](https://huggingface.co/datasets/Open-Orca/SlimOrca-Dedup)
- Korean-translated version of [argilla/ultrafeedback-binarized-preferences-cleaned](https://huggingface.co/datasets/argilla/ultrafeedback-binarized-preferences-cleaned)
- No other dataset was used
## Citation
```
@misc{kim2024efficient,
title={Efficient and Effective Vocabulary Expansion Towards Multilingual Large Language Models},
author={Seungduk Kim and Seungtaek Choi and Myeongho Jeong},
year={2024},
eprint={2402.14714},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
```
```
@misc{cui2023ultrafeedback,
title={UltraFeedback: Boosting Language Models with High-quality Feedback},
author={Ganqu Cui and Lifan Yuan and Ning Ding and Guanming Yao and Wei Zhu and Yuan Ni and Guotong Xie and Zhiyuan Liu and Maosong Sun},
year={2023},
eprint={2310.01377},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
```
```
@misc{SlimOrcaDedup,
title = {SlimOrca Dedup: A Deduplicated Subset of SlimOrca},
author = {Wing Lian and Guan Wang and Bleys Goodson and Eugene Pentland and Austin Cook and Chanvichet Vong and "Teknium" and Nathan Hoos},
year = {2023},
publisher = {HuggingFace},
url = {https://huggingface.co/datasets/Open-Orca/SlimOrca-Dedup/}
}
```
```
@misc{mukherjee2023orca,
title={Orca: Progressive Learning from Complex Explanation Traces of GPT-4},
author={Subhabrata Mukherjee and Arindam Mitra and Ganesh Jawahar and Sahaj Agarwal and Hamid Palangi and Ahmed Awadallah},
year={2023},
eprint={2306.02707},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
```
# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_yanolja__EEVE-Korean-Instruct-10.8B-v1.0)
| Metric |Value|
|---------------------------------|----:|
|Avg. |66.48|
|AI2 Reasoning Challenge (25-Shot)|64.85|
|HellaSwag (10-Shot) |83.04|
|MMLU (5-Shot) |64.23|
|TruthfulQA (0-shot) |54.09|
|Winogrande (5-shot) |81.93|
|GSM8k (5-shot) |50.72|

31
config.json Normal file
View File

@@ -0,0 +1,31 @@
{
"_name_or_path": "maywell/EEVE-Korean-Instruct-10.8B-v1.0-32k",
"architectures": [
"LlamaForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 1,
"eos_token_id": 32000,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 14336,
"max_position_embeddings": 32768,
"model_type": "llama",
"num_attention_heads": 32,
"num_hidden_layers": 48,
"num_key_value_heads": 8,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_scaling": {
"factor": 8.0,
"type": "linear"
},
"rope_theta": 10000.0,
"tie_word_embeddings": false,
"torch_dtype": "float16",
"transformers_version": "4.34.0",
"use_cache": false,
"vocab_size": 40960
}

1
configuration.json Normal file
View File

@@ -0,0 +1 @@
{"framework": "pytorch", "task": "text-generation", "allow_remote": true}

7
generation_config.json Normal file
View File

@@ -0,0 +1,7 @@
{
"_from_model_config": true,
"bos_token_id": 1,
"eos_token_id": 32000,
"transformers_version": "4.34.0",
"use_cache": false
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f4daced14b522b148879db6e2d031ad6861aa5b1f1243fca435c5fa83a536f6b
size 9982876968

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:bd5c5a256d68e63d6ae78ce414f963000d91acb7b1b68db0dfdf7da3e800d256
size 9982894110

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4440c8d84365fb0821c18c79733696c68fae6ba43dc20bb82e555f29c177e1c1
size 1644234996

View File

@@ -0,0 +1,442 @@
{
"metadata": {
"total_size": 21609848832
},
"weight_map": {
"lm_head.weight": "pytorch_model-00003-of-00003.bin",
"model.embed_tokens.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.10.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.10.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.10.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.10.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.10.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.10.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.10.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.10.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.10.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.11.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.11.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.11.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.11.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.11.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.11.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.11.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.11.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.11.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.12.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.12.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.12.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.12.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.12.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.12.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.12.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.12.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.12.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.13.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.13.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.13.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.13.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.13.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.13.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.13.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.13.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.13.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.14.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.14.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.14.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.14.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.14.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.14.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.14.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.14.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.14.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.15.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.15.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.15.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.15.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.15.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.15.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.15.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.15.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.15.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.16.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.16.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.16.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.16.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.16.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.16.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.16.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.16.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.16.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.17.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.17.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.17.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.17.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.17.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.17.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.17.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.17.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.17.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.18.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.18.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.18.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.18.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.18.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.18.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.18.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.18.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.18.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.19.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.19.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.19.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.19.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.19.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.19.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.19.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.19.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.19.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.20.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.20.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.20.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.20.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.20.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.20.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.20.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.20.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.20.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.21.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.21.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.21.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.21.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.21.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.21.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.21.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.21.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.21.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.22.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.22.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.22.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.22.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.22.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.22.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.22.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.22.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.22.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.23.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.23.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.23.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.23.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.23.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.23.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.23.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.23.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.23.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.24.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.24.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.24.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.24.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.24.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.24.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.24.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.24.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.24.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.25.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.25.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.25.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.25.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.25.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.25.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.25.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.25.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.25.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.26.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.26.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.26.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.26.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.26.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.26.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.26.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.26.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.26.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.27.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.27.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.27.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.27.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.27.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.27.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.27.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.27.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.27.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.28.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.28.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.28.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.28.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.28.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.28.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.28.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.28.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.28.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.29.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.29.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.29.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.29.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.29.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.29.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.29.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.29.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.29.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.30.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.30.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.30.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.30.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.30.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.30.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.30.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.30.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.30.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.31.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.31.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.31.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.31.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.31.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.31.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.31.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.31.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.31.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.32.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.32.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.32.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.32.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.32.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.32.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.32.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.32.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.32.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.33.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.33.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.33.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.33.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.33.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.33.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.33.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.33.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.33.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.34.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.34.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.34.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.34.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.34.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.34.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.34.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.34.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.34.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.35.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.35.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.35.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.35.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.35.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.35.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.35.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.35.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.35.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.36.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.36.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.36.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.36.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.36.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.36.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.36.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.36.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.36.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.37.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.37.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.37.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.37.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.37.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.37.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.37.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.37.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.37.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.38.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.38.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.38.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.38.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.38.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.38.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.38.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.38.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.38.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.39.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.39.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.39.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.39.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.39.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.39.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.39.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.39.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.39.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.40.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.40.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.40.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.40.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.40.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.40.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.40.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.40.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.40.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.41.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.41.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.41.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.41.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.41.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.41.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.41.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.41.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.41.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.42.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.42.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.42.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.42.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.42.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.42.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.42.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.42.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.42.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.43.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.43.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.43.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.43.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.43.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.43.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.43.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.43.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.43.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.44.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.44.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.44.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.44.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.44.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.44.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.44.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.44.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.44.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
"model.layers.45.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.45.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.45.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.45.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.45.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.45.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.45.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.45.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.45.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.46.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.46.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.46.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.46.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.46.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.46.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.46.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.46.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.46.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.47.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.47.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.47.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.47.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.47.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.47.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.47.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.47.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.47.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
"model.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.5.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.5.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.5.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.5.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.6.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.6.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.6.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.6.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.6.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.6.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.6.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.6.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.7.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.7.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.7.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.7.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.7.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.7.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.7.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.7.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.8.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.8.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.8.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.8.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.8.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.8.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.8.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.8.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.9.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.9.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.9.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.9.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.9.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.9.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.9.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.9.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.layers.9.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
"model.norm.weight": "pytorch_model-00003-of-00003.bin"
}
}

12
special_tokens_map.json Normal file
View File

@@ -0,0 +1,12 @@
{
"additional_special_tokens": [
"<unk>",
"<s>",
"</s>",
"<|im_end|>"
],
"bos_token": "<s>",
"eos_token": "<|im_end|>",
"pad_token": "</s>",
"unk_token": "<unk>"
}

3
tokenizer.json Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:02b0589ffe8224ef1f6992363681886963e53cdc69e11a8bfdf1e8742bd6cbb4
size 2175507

57
tokenizer_config.json Normal file
View File

@@ -0,0 +1,57 @@
{
"added_tokens_decoder": {
"0": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"32000": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [
"<unk>",
"<s>",
"</s>",
"<|im_end|>"
],
"bos_token": "<s>",
"chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = 'You are a helpful assistant.' %}{% endif %}{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in loop_messages %}{% if loop.index0 == 0 %}{{'<|im_start|>system\n' + system_message + '<|im_end|>\n'}}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
"clean_up_tokenization_spaces": false,
"eos_token": "<|im_end|>",
"legacy": true,
"model_max_length": 32768,
"pad_token": "</s>",
"padding_side": "right",
"sp_model_kwargs": {},
"spaces_between_special_tokens": false,
"tokenizer_class": "LlamaTokenizer",
"trust_remote_code": false,
"unk_token": "<unk>",
"use_default_system_prompt": false,
"use_fast": true
}