初始化项目,由ModelHub XC社区提供模型
Model: yanolja/YanoljaNEXT-EEVE-Instruct-2.8B Source: Original Platform
This commit is contained in:
35
.gitattributes
vendored
Normal file
35
.gitattributes
vendored
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.model filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
127
README.md
Normal file
127
README.md
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
---
|
||||||
|
license: apache-2.0
|
||||||
|
tags:
|
||||||
|
- generated_from_trainer
|
||||||
|
base_model: yanolja/EEVE-Korean-2.8B-v1.0
|
||||||
|
model-index:
|
||||||
|
- name: yanolja/EEVE-Korean-Instruct-2.8B-v1.0
|
||||||
|
results: []
|
||||||
|
---
|
||||||
|
[<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
|
||||||
|
|
||||||
|
# EEVE-Korean-Instruct-2.8B-v1.0
|
||||||
|
|
||||||
|
## Join Our Community on Discord!
|
||||||
|
|
||||||
|
If you're passionate about the field of Large Language Models and wish to exchange knowledge and insights, we warmly invite you to join our Discord server. It's worth noting that Korean is the primary language used in this server. The landscape of LLM is evolving rapidly, and without active sharing, our collective knowledge risks becoming outdated swiftly. Let's collaborate and drive greater impact together! Join us here: [Discord Link](https://discord.gg/b27bAHg95m).
|
||||||
|
|
||||||
|
## Our Dedicated Team (Alphabetical Order)
|
||||||
|
| Research | Engineering | Product Management | UX Design |
|
||||||
|
|-----------------|-----------------|--------------------|--------------
|
||||||
|
| Myeongho Jeong | Geon Kim | Bokyung Huh | Eunsue Choi |
|
||||||
|
| Seungduk Kim | Rifqi Alfi | | |
|
||||||
|
| Seungtaek Choi | Sanghoon Han | | |
|
||||||
|
| | Suhyun Kang | | |
|
||||||
|
|
||||||
|
## About the Model
|
||||||
|
|
||||||
|
This model is a fine-tuned version of [yanolja/EEVE-Korean-2.8B-v1.0](https://huggingface.co/yanolja/EEVE-Korean-2.8B-v1.0), which is a Korean vocabulary-extended version of [microsoft/phi-2](https://huggingface.co/microsoft/phi-2). Specifically, we utilized Direct Preference Optimization (DPO) through the use of [Axolotl](https://github.com/OpenAccess-AI-Collective/axolotl).
|
||||||
|
|
||||||
|
For more details, please refer to our technical report: [Efficient and Effective Vocabulary Expansion Towards Multilingual Large Language Models](https://arxiv.org/abs/2402.14714).
|
||||||
|
|
||||||
|
## Prompt Template
|
||||||
|
```
|
||||||
|
A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
|
||||||
|
Human: {prompt}
|
||||||
|
Assistant:
|
||||||
|
```
|
||||||
|
## How to Use it
|
||||||
|
```python
|
||||||
|
from transformers import AutoTokenizer
|
||||||
|
from transformers import AutoModelForCausalLM
|
||||||
|
|
||||||
|
model = AutoModelForCausalLM.from_pretrained("yanolja/EEVE-Korean-Instruct-2.8B-v1.0", trust_remote_code=True)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained("yanolja/EEVE-Korean-Instruct-2.8B-v1.0", trust_remote_code=True)
|
||||||
|
|
||||||
|
prompt_template = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\nHuman: {prompt}\nAssistant:\n"
|
||||||
|
text = '한국의 수도는 어디인가요? 아래 선택지 중 골라주세요.\n\n(A) 경성\n(B) 부산\n(C) 평양\n(D) 서울\n(E) 전주'
|
||||||
|
model_inputs = tokenizer(prompt_template.format(prompt=text), return_tensors='pt')
|
||||||
|
|
||||||
|
outputs = model.generate(**model_inputs, max_new_tokens=256)
|
||||||
|
output_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
|
||||||
|
print(output_text)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example Output
|
||||||
|
```
|
||||||
|
A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
|
||||||
|
Human: 한국의 수도는 어디인가요? 아래 선택지 중 골라주세요.
|
||||||
|
|
||||||
|
(A) 경성
|
||||||
|
(B) 부산
|
||||||
|
(C) 평양
|
||||||
|
(D) 서울
|
||||||
|
(E) 전주
|
||||||
|
Assistant:
|
||||||
|
한국의 수도는 (D) 서울입니다. 서울은 수도권과 수도권 내의 주요 도시들을 포함하는 광역 행정구역으로, 대한민국의 수도입니다. 서울은 수도권 인구의 약 70%를 차지하며, 대한민국의 경제, 정치, 문화의 중심지입니다.
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Training Data
|
||||||
|
- Korean-translated version of [Open-Orca/SlimOrca-Dedup](https://huggingface.co/datasets/Open-Orca/SlimOrca-Dedup)
|
||||||
|
- Korean-translated version of [argilla/ultrafeedback-binarized-preferences-cleaned](https://huggingface.co/datasets/argilla/ultrafeedback-binarized-preferences-cleaned)
|
||||||
|
- No other dataset was used
|
||||||
|
|
||||||
|
## Citation
|
||||||
|
```
|
||||||
|
@misc{kim2024efficient,
|
||||||
|
title={Efficient and Effective Vocabulary Expansion Towards Multilingual Large Language Models},
|
||||||
|
author={Seungduk Kim and Seungtaek Choi and Myeongho Jeong},
|
||||||
|
year={2024},
|
||||||
|
eprint={2402.14714},
|
||||||
|
archivePrefix={arXiv},
|
||||||
|
primaryClass={cs.CL}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
```
|
||||||
|
@misc{cui2023ultrafeedback,
|
||||||
|
title={UltraFeedback: Boosting Language Models with High-quality Feedback},
|
||||||
|
author={Ganqu Cui and Lifan Yuan and Ning Ding and Guanming Yao and Wei Zhu and Yuan Ni and Guotong Xie and Zhiyuan Liu and Maosong Sun},
|
||||||
|
year={2023},
|
||||||
|
eprint={2310.01377},
|
||||||
|
archivePrefix={arXiv},
|
||||||
|
primaryClass={cs.CL}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
```
|
||||||
|
@misc{SlimOrcaDedup,
|
||||||
|
title = {SlimOrca Dedup: A Deduplicated Subset of SlimOrca},
|
||||||
|
author = {Wing Lian and Guan Wang and Bleys Goodson and Eugene Pentland and Austin Cook and Chanvichet Vong and "Teknium" and Nathan Hoos},
|
||||||
|
year = {2023},
|
||||||
|
publisher = {HuggingFace},
|
||||||
|
url = {https://huggingface.co/datasets/Open-Orca/SlimOrca-Dedup/}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
```
|
||||||
|
@misc{mukherjee2023orca,
|
||||||
|
title={Orca: Progressive Learning from Complex Explanation Traces of GPT-4},
|
||||||
|
author={Subhabrata Mukherjee and Arindam Mitra and Ganesh Jawahar and Sahaj Agarwal and Hamid Palangi and Ahmed Awadallah},
|
||||||
|
year={2023},
|
||||||
|
eprint={2306.02707},
|
||||||
|
archivePrefix={arXiv},
|
||||||
|
primaryClass={cs.CL}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
|
||||||
|
Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_yanolja__EEVE-Korean-Instruct-2.8B-v1.0)
|
||||||
|
|
||||||
|
| Metric |Value|
|
||||||
|
|---------------------------------|----:|
|
||||||
|
|Avg. |58.71|
|
||||||
|
|AI2 Reasoning Challenge (25-Shot)|58.28|
|
||||||
|
|HellaSwag (10-Shot) |72.42|
|
||||||
|
|MMLU (5-Shot) |53.35|
|
||||||
|
|TruthfulQA (0-shot) |48.32|
|
||||||
|
|Winogrande (5-shot) |74.82|
|
||||||
|
|GSM8k (5-shot) |45.11|
|
||||||
|
|
||||||
30
config.json
Normal file
30
config.json
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
{
|
||||||
|
"_name_or_path": "yanolja/EEVE-Korean-Instruct-2.8B-v1.0",
|
||||||
|
"architectures": [
|
||||||
|
"PhiForCausalLM"
|
||||||
|
],
|
||||||
|
"attention_dropout": 0.0,
|
||||||
|
"bos_token_id": 58940,
|
||||||
|
"embd_pdrop": 0.0,
|
||||||
|
"eos_token_id": 58943,
|
||||||
|
"hidden_act": "gelu_new",
|
||||||
|
"hidden_size": 2560,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"intermediate_size": 10240,
|
||||||
|
"layer_norm_eps": 1e-05,
|
||||||
|
"max_position_embeddings": 2048,
|
||||||
|
"model_type": "phi",
|
||||||
|
"num_attention_heads": 32,
|
||||||
|
"num_hidden_layers": 32,
|
||||||
|
"num_key_value_heads": 32,
|
||||||
|
"partial_rotary_factor": 0.4,
|
||||||
|
"qk_layernorm": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"rope_scaling": null,
|
||||||
|
"rope_theta": 10000.0,
|
||||||
|
"tie_word_embeddings": false,
|
||||||
|
"torch_dtype": "bfloat16",
|
||||||
|
"transformers_version": "4.38.0.dev0",
|
||||||
|
"use_cache": false,
|
||||||
|
"vocab_size": 58944
|
||||||
|
}
|
||||||
3
model-00001-of-00002.safetensors
Normal file
3
model-00001-of-00002.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:83547e25604779e58059faa6d5828dbd893ab59b56f8430c50887f4b709f5933
|
||||||
|
size 4969676992
|
||||||
3
model-00002-of-00002.safetensors
Normal file
3
model-00002-of-00002.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:cd1cde0c2976d95dbfd754ffdd3c4cf3e74261a42d9749f5febb72a3adffcb9a
|
||||||
|
size 669054904
|
||||||
460
model.safetensors.index.json
Normal file
460
model.safetensors.index.json
Normal file
@@ -0,0 +1,460 @@
|
|||||||
|
{
|
||||||
|
"metadata": {
|
||||||
|
"total_size": 5638681728
|
||||||
|
},
|
||||||
|
"weight_map": {
|
||||||
|
"lm_head.bias": "model-00002-of-00002.safetensors",
|
||||||
|
"lm_head.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.embed_tokens.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.final_layernorm.bias": "model-00002-of-00002.safetensors",
|
||||||
|
"model.final_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.0.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.18.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.18.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.18.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.18.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.18.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.18.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.18.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.18.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.19.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.19.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.19.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.19.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.19.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.19.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.19.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.19.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.20.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.20.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.20.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.20.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.20.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.20.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.20.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.20.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.21.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.21.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.21.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.21.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.21.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.21.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.21.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.21.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.21.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.22.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.22.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.22.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.22.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.22.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.22.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.22.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.22.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.22.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.23.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.23.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.23.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.23.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.23.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.23.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.23.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.23.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.23.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.24.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.24.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.24.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.24.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.24.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.24.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.24.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.24.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.24.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.25.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.25.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.25.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.25.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.25.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.25.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.25.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.25.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.25.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.26.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.26.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.26.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.26.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.26.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.26.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.26.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.26.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.26.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.27.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.27.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.27.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.27.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.27.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.27.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.27.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.27.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.27.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.28.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.28.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.28.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.28.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.28.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.28.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.28.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.28.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.28.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.29.input_layernorm.bias": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.29.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.29.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.29.mlp.fc2.bias": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.29.mlp.fc2.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.29.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.29.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.29.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.29.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.30.input_layernorm.bias": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.mlp.fc1.bias": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.mlp.fc1.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.mlp.fc2.bias": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.mlp.fc2.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.self_attn.dense.bias": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.self_attn.dense.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.input_layernorm.bias": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.mlp.fc1.bias": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.mlp.fc1.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.mlp.fc2.bias": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.mlp.fc2.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.self_attn.dense.bias": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.self_attn.dense.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.4.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.input_layernorm.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.mlp.fc1.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.mlp.fc1.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.mlp.fc2.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.mlp.fc2.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.self_attn.dense.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.self_attn.dense.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors"
|
||||||
|
}
|
||||||
|
}
|
||||||
30
special_tokens_map.json
Normal file
30
special_tokens_map.json
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
{
|
||||||
|
"bos_token": {
|
||||||
|
"content": "<s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"eos_token": {
|
||||||
|
"content": "<|im_end|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"pad_token": {
|
||||||
|
"content": "</s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"unk_token": {
|
||||||
|
"content": "<|endoftext|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
}
|
||||||
|
}
|
||||||
116319
tokenizer.json
Normal file
116319
tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
53
tokenizer_config.json
Normal file
53
tokenizer_config.json
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
{
|
||||||
|
"add_bos_token": true,
|
||||||
|
"add_eos_token": false,
|
||||||
|
"added_tokens_decoder": {
|
||||||
|
"50256": {
|
||||||
|
"content": "<|endoftext|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"58940": {
|
||||||
|
"content": "<s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"58941": {
|
||||||
|
"content": "</s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"58943": {
|
||||||
|
"content": "<|im_end|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additional_special_tokens": [],
|
||||||
|
"bos_token": "<s>",
|
||||||
|
"chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = 'You are a helpful assistant.' %}{% endif %}{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in loop_messages %}{% if loop.index0 == 0 %}{{'<|im_start|>system\n' + system_message + '<|im_end|>\n'}}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
||||||
|
"clean_up_tokenization_spaces": false,
|
||||||
|
"eos_token": "<|im_end|>",
|
||||||
|
"legacy": true,
|
||||||
|
"model_max_length": 1000000000000000019884624838656,
|
||||||
|
"pad_token": "</s>",
|
||||||
|
"sp_model_kwargs": {},
|
||||||
|
"spaces_between_special_tokens": false,
|
||||||
|
"tokenizer_class": "LlamaTokenizer",
|
||||||
|
"trust_remote_code": false,
|
||||||
|
"unk_token": "<|endoftext|>",
|
||||||
|
"use_default_system_prompt": false,
|
||||||
|
"use_fast": true
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user