初始化项目,由ModelHub XC社区提供模型

Model: beomi/OPEN-SOLAR-KO-10.7B
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-10 16:26:33 +08:00
commit e4486f1970
19 changed files with 131073 additions and 0 deletions

35
.gitattributes vendored Normal file
View File

@@ -0,0 +1,35 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text

191
LICENSE Normal file
View File

@@ -0,0 +1,191 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
Copyright 2024 Junbum Lee(Beomi)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

125
README.md Normal file
View File

@@ -0,0 +1,125 @@
---
language:
- ko
- en
pipeline_tag: text-generation
inference: false
tags:
- solar
- mistral
- pytorch
- solar-ko
library_name: transformers
license: apache-2.0
---
**Update Log**
- 2024.01.08: Initial Test version Release of Solar-Ko
# **Open-Solar-Ko** ⭐🇰🇷
Solar-Ko represents an advanced iteration of the upstage/SOLAR-10.7B-v1.0 model, featuring an expanded vocabulary and the inclusion of a Korean corpus for enhanced pretraining.
Open-Solar-Ko exclusively utilizes publicly accessible Korean corpora, including sources such as [AI Hub](https://www.aihub.or.kr), [Modu Corpus, 모두의 말뭉치](https://corpus.korean.go.kr/), and [Korean Wikipedia](https://dumps.wikimedia.org/kowiki/).
As training was conducted solely with publicly available corpora, this model is open for unrestricted use by everyone, adhering to the Apache2.0 open source License.
## Model Details
**Model Developers:** Junbum Lee (Beomi)
**Variations:** Solar-Ko is available with one parameter sizes — 10B with Continual Pretrained version.
**Input:** The model accepts only text input.
**Output:** The model produces text output exclusively.
**Model Architecture:**
SOLAR-KO-10.7B is an auto-regressive language model that leverages an optimized transformer architecture derived from Llama-2.
| |Training Data|Parameters|Content Length|GQA|Tokens|Learning Rate|
|---|---|---|---|---|---|---|
|SOLAR-KO-10.7B|*A curated mix of Publicly Accessible Korean Corpora*|10.7B|4k|O|>15B*|5e<sup>-5</sup>|
**Training Corpus**
The model was trained using selected datasets from AIHub and Modu Corpus. Detailed information about the training datasets is available below:
- AI Hub: [corpus/AI_HUB](./corpus/AI_HUB)
- Only the `Training` segment of the data was used.
- The `Validation` and `Test` segments were deliberately excluded.
- Modu Corpus: [corpus/MODU_CORPUS](./corpus/MODU_CORPUS)
The final JSONL dataset used to train this model is approximately 61GB in size.
Total token count: Approximately 15 billion tokens (*using the expanded tokenizer. With the original SOLAR tokenizer, >60 billion tokens.)
**Vocab Expansion**
| Model Name | Vocabulary Size | Description |
| --- | --- | --- |
| Original Solar | 32000 | Sentencepiece BPE |
| **Expanded SOLAR-KO-10.7B** | 46592 | Sentencepiece BPE. Added Korean vocab and merges |
**Tokenizing "안녕하세요, 오늘은 날씨가 좋네요."**
- SOLAR-10.7B: 26 tokens
- SOLAR-KO-10.7b: 8 tokens
| Model | Tokens |
| --- | --- |
| SOLAR-10.7B | `['▁', '안', '<0xEB>', '<0x85>', '<0x95>', '하', '세', '요', ',', '▁', '오', '<0xEB>', '<0x8A>', '<0x98>', '은', '▁', '날', '<0xEC>', '<0x94>', '<0xA8>', '가', '▁', '좋', '네', '요', '.']` |
| SOLAR-KO-10.7B | `['▁안녕', '하세요', ',', '▁오늘은', '▁날', '씨가', '▁좋네요', '.']` |
**Tokenizing "Meet 10.7B Solar: Elevating Performance with Upstage Depth UP Scaling!"**
- SOLAR-10.7B: 22 tokens
- SOLAR-KO-10.7b: 22 tokens
| Model | Tokens |
| --- | --- |
| SOLAR-10.7B | `['▁Meet', '▁', '1', '0', '.', '7', 'B', '▁Solar', ':', '▁E', 'lev', 'ating', '▁Performance', '▁with', '▁Up', 'stage', '▁Dep', 'th', '▁UP', '▁Scal', 'ing', '!']` |
| SOLAR-KO-10.7B | `['▁Meet', '▁', '1', '0', '.', '7', 'B', '▁Solar', ':', '▁E', 'lev', 'ating', '▁Performance', '▁with', '▁Up', 'stage', '▁Dep', 'th', '▁UP', '▁Scal', 'ing', '!']` |
# LICENSE
Apache 2.0
# **Model Benchmark**
## LM Eval Harness - Korean (polyglot branch)
- Used EleutherAI's lm-evaluation-harness https://github.com/EleutherAI/lm-evaluation-harness/tree/polyglot
| | 0 | 5 | 10 | 50 |
|:---------------------------------|---------:|---------:|---------:|---------:|
| kobest_boolq (macro_f1) | 0.853949 | 0.88098 | 0.898139 | 0.902354 |
| kobest_copa (macro_f1) | 0.804531 | 0.826736 | 0.837656 | 0.860899 |
| kobest_hellaswag (macro_f1) | 0.507174 | 0.500983 | 0.487287 | 0.512182 |
| kobest_sentineg (macro_f1) | 0.3517 | 0.972291 | 0.977321 | 0.984884 |
| kohatespeech (macro_f1) | 0.258111 | 0.403957 | 0.386808 | 0.462393 |
| kohatespeech_apeach (macro_f1) | 0.337667 | 0.651697 | 0.705337 | 0.827757 |
| kohatespeech_gen_bias (macro_f1) | 0.124535 | 0.503464 | 0.498501 | 0.443218 |
| korunsmile (f1) | 0.3814 | 0.356939 | 0.369989 | 0.296193 |
| nsmc (acc) | 0.5356 | 0.87162 | 0.88654 | 0.89632 |
| pawsx_ko (acc) | 0.5435 | 0.5245 | 0.5315 | 0.5385 |
## Citation
```
@misc {solar_ko_junbum_2023,
author = { {L. Junbum} },
title = { Solar-Ko-10.7b },
year = 2024,
url = { https://huggingface.co/beomi/SOLAR-KO-10.7B },
publisher = { Hugging Face }
}
```
## Acknowledgements
- Training support was provided by the [TPU Research Cloud](https://sites.research.google/trc/) program.
- The training corpus includes data from [AI Hub](https://www.aihub.or.kr/), [Modu Corpus](https://corpus.korean.go.kr/), and [Korean Wikipedia](https://dumps.wikimedia.org/kowiki/).

29
config.json Normal file
View File

@@ -0,0 +1,29 @@
{
"_name_or_path": "beomi/SOLAR-KO-10.7B-DEV",
"architectures": [
"LlamaForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 1,
"eos_token_id": 2,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 14336,
"max_position_embeddings": 4096,
"model_type": "llama",
"num_attention_heads": 32,
"num_hidden_layers": 48,
"num_key_value_heads": 8,
"pad_token_id": 2,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_scaling": null,
"rope_theta": 10000.0,
"tie_word_embeddings": false,
"torch_dtype": "bfloat16",
"transformers_version": "4.36.2",
"use_cache": true,
"vocab_size": 46592
}

50
corpus/AI_HUB Normal file
View File

@@ -0,0 +1,50 @@
754M ./001.문서요약.jsonl
397M ./006.전문분야한영.jsonl
486M ./016.행정_문서_대상_기계독해_데이터.jsonl
563M ./017.뉴스_기사_기계독해_데이터.jsonl
1.2G ./018.논문자료_요약_데이터.jsonl
88M ./019.법률,_규정_(판결서,_약관_등)_텍스트_분석_데이터.jsonl
75M ./020.주제별_텍스트_일상_대화_데이터.jsonl
265M ./021.도서자료_기계독해.jsonl
30M ./021.용도별_목적대화_데이터.jsonl
566M ./022.요약문_및_레포트_생성_데이터.jsonl
19G ./023.전문분야_말뭉치_데이터(분야별_개체명_인식_포함).jsonl
253M ./023.방송_콘텐츠_대본_요약_데이터.jsonl
918M ./025.일상생활_및_구어체_한-영_번역_병렬_말뭉치_데이터.jsonl
307M ./026.한국어-영어_번역_말뭉치_1.jsonl
1.3G ./026.기술과학_분야_한-영_번역_병렬_말뭉치_데이터.jsonl
309M ./027.한국어-중국어_번역_말뭉치_1.jsonl
347M ./027.한국어-영어_번역_말뭉치_2.jsonl
538M ./027.일상생활_및_구어체_한-중,_한-일_번역_병렬_말뭉치_데이터.jsonl
276M ./028.한국어-중국어_번역_말뭉치_2.jsonl
300M ./028.다국어_구어체_번역_병렬_말뭉치_데이터.jsonl
410M ./029.한국어-일본어_번역_말뭉치.jsonl
542K ./029.대규모_구매도서_기반_한국어_말뭉치_데이터.jsonl
9.9G ./030.웹데이터_기반_한국어_말뭉치_데이터.jsonl
1.4G ./031.온라인_구어체_말뭉치_데이터.jsonl
258M ./032.방송콘텐츠_한국어-영어_번역_말뭉치.jsonl
84M ./032.특허_분야_자동분류_데이터.jsonl
239M ./034.방송콘텐츠_한국어-유럽어_번역_말뭉치.jsonl
65M ./044.페르소나_대화.jsonl
56M ./045.지식검색_대화.jsonl
67M ./046.공감형_대화.jsonl
85M ./049.일반상식_문장_생성_평가_데이터.jsonl
13M ./050.발화유형(문어,구어,채팅)별_기계번역_병렬_말뭉치.jsonl
193K ./052.기계번역_품질_검증_데이터.jsonl
118M ./053.한국어-다국어(영어_제외)_번역_말뭉치(기술과학).jsonl
127M ./054.한국어-다국어_번역_말뭉치(기초과학).jsonl
67M ./055.한국어-다국어_번역_말뭉치(인문학).jsonl
205M ./11.기계독해.jsonl
259M ./141.한국어_멀티세션_대화.jsonl
248M ./142.한국어_지식기반_관계_데이터.jsonl
108M ./143.민원_업무_효율,_자동화를_위한_언어_AI_학습데이터.jsonl
2.4G ./146.낚시성_기사_탐지_데이터.jsonl
23M ./147.텍스트_윤리검증_데이터.jsonl
632M ./153.기술과학_요약_데이터.jsonl
962M ./155.산업정보_연계_주요국_특허_영-한_데이터.jsonl
1.1G ./156.전문분야_영-한,_중-한_번역_말뭉치(식품).jsonl
236M ./157.방송_콘텐츠_한-중,_한-일_번역_병렬_말뭉치_데이터.jsonl
418M ./157.추상_요약_사실성_검증_데이터.jsonl
12M ./158.시간_표현_탐지_데이터.jsonl
17M ./159.문장_유형(추론,_예측_등)_판단_데이터.jsonl
1.4G ./297.SNS_데이터_고도화.jsonl

6
corpus/MODU_CORPUS Normal file
View File

@@ -0,0 +1,6 @@
일상대화말뭉치 2020, 2021
신문 말뭉치 2020, 2021, 2022
유사 문장 말뭉치
문서 요약 말뭉치
문어 말뭉치
의미역 분석 말뭉치

7
generation_config.json Normal file
View File

@@ -0,0 +1,7 @@
{
"_from_model_config": true,
"bos_token_id": 1,
"eos_token_id": 2,
"pad_token_id": 2,
"transformers_version": "4.36.2"
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9cd1cbc109a254736e4c703f4d8f5043ed75f004e59f2f6186ecbc32fa8972ee
size 2999032032

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b112674c880e1fe25de44549e5eb7ab8d3ce0d03025e34ade1ec907e6c8f7383
size 2936118064

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ac89a2004030539d650cfd62d022297b70418b225eb0b81cf47629a262a8dfd9
size 2936134712

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:75f2edf11bc5c3cd78b1b11c138a2e4159f92ac8be62b98b5aade2ece596d7c3
size 2936134720

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d7294048ca3942f4573cf7722850cc6359abd28a7876674582b2e17c442f08a5
size 2969688800

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:87bc1a2dd6caa45b8007d69030ca8ae2123e052645d4329de4252292facb6e8f
size 2936118096

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:a1d608dd6efcc6f9310a7d9f91b322854506e714dfe0474717c141d874aa4b62
size 2936134712

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d84806648e07633333bf2d6e3f9d325426ebd4b35a9881a0e2ec4908ae06d1a5
size 1052812976

View File

@@ -0,0 +1,442 @@
{
"metadata": {
"total_size": 21702123520
},
"weight_map": {
"lm_head.weight": "model-00008-of-00008.safetensors",
"model.embed_tokens.weight": "model-00001-of-00008.safetensors",
"model.layers.0.input_layernorm.weight": "model-00001-of-00008.safetensors",
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00008.safetensors",
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.1.input_layernorm.weight": "model-00001-of-00008.safetensors",
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00008.safetensors",
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.10.input_layernorm.weight": "model-00002-of-00008.safetensors",
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.11.input_layernorm.weight": "model-00002-of-00008.safetensors",
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.12.input_layernorm.weight": "model-00003-of-00008.safetensors",
"model.layers.12.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.12.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.13.input_layernorm.weight": "model-00003-of-00008.safetensors",
"model.layers.13.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.13.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.13.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.13.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
"model.layers.13.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.13.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.13.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.13.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.14.input_layernorm.weight": "model-00003-of-00008.safetensors",
"model.layers.14.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.14.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.14.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.14.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
"model.layers.14.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.14.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.14.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.14.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.15.input_layernorm.weight": "model-00003-of-00008.safetensors",
"model.layers.15.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.15.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.15.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.15.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
"model.layers.15.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.15.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.15.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.15.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.16.input_layernorm.weight": "model-00003-of-00008.safetensors",
"model.layers.16.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.16.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.16.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.16.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
"model.layers.16.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.16.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.16.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.16.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.17.input_layernorm.weight": "model-00003-of-00008.safetensors",
"model.layers.17.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.17.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.17.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.17.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
"model.layers.17.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.17.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.17.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.17.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.18.input_layernorm.weight": "model-00003-of-00008.safetensors",
"model.layers.18.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.18.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.18.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.18.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
"model.layers.18.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.18.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.18.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.18.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.19.input_layernorm.weight": "model-00004-of-00008.safetensors",
"model.layers.19.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.19.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.19.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.19.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
"model.layers.19.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.19.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.19.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.19.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
"model.layers.2.input_layernorm.weight": "model-00001-of-00008.safetensors",
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00008.safetensors",
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.20.input_layernorm.weight": "model-00004-of-00008.safetensors",
"model.layers.20.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.20.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.20.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.20.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
"model.layers.20.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.20.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.20.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.20.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.21.input_layernorm.weight": "model-00004-of-00008.safetensors",
"model.layers.21.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.21.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.21.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.21.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
"model.layers.21.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.21.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.21.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.21.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.22.input_layernorm.weight": "model-00004-of-00008.safetensors",
"model.layers.22.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.22.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.22.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.22.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
"model.layers.22.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.22.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.22.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.22.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.23.input_layernorm.weight": "model-00004-of-00008.safetensors",
"model.layers.23.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.23.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.23.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.23.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
"model.layers.23.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.23.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.23.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.23.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.24.input_layernorm.weight": "model-00004-of-00008.safetensors",
"model.layers.24.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.24.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.24.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.24.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
"model.layers.24.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.24.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.24.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.24.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.25.input_layernorm.weight": "model-00004-of-00008.safetensors",
"model.layers.25.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.25.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.25.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.25.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
"model.layers.25.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.25.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.25.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.25.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.26.input_layernorm.weight": "model-00005-of-00008.safetensors",
"model.layers.26.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.26.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.26.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.26.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
"model.layers.26.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.26.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.26.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.26.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
"model.layers.27.input_layernorm.weight": "model-00005-of-00008.safetensors",
"model.layers.27.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.27.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.27.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.27.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
"model.layers.27.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.27.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.27.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.27.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.28.input_layernorm.weight": "model-00005-of-00008.safetensors",
"model.layers.28.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.28.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.28.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.28.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
"model.layers.28.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.28.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.28.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.28.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.29.input_layernorm.weight": "model-00005-of-00008.safetensors",
"model.layers.29.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.29.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.29.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.29.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
"model.layers.29.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.29.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.29.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.29.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.3.input_layernorm.weight": "model-00001-of-00008.safetensors",
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00008.safetensors",
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.30.input_layernorm.weight": "model-00005-of-00008.safetensors",
"model.layers.30.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.30.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.30.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.30.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
"model.layers.30.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.30.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.30.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.30.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.31.input_layernorm.weight": "model-00005-of-00008.safetensors",
"model.layers.31.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.31.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.31.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.31.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
"model.layers.31.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.31.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.31.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.31.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.32.input_layernorm.weight": "model-00005-of-00008.safetensors",
"model.layers.32.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.32.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.32.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.32.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
"model.layers.32.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.32.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.32.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.32.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
"model.layers.33.input_layernorm.weight": "model-00006-of-00008.safetensors",
"model.layers.33.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.33.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.33.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.33.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
"model.layers.33.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.33.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.33.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.33.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.34.input_layernorm.weight": "model-00006-of-00008.safetensors",
"model.layers.34.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.34.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.34.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.34.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
"model.layers.34.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.34.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.34.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.34.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.35.input_layernorm.weight": "model-00006-of-00008.safetensors",
"model.layers.35.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.35.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.35.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.35.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
"model.layers.35.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.35.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.35.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.35.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.36.input_layernorm.weight": "model-00006-of-00008.safetensors",
"model.layers.36.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.36.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.36.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.36.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
"model.layers.36.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.36.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.36.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.36.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.37.input_layernorm.weight": "model-00006-of-00008.safetensors",
"model.layers.37.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.37.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.37.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.37.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
"model.layers.37.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.37.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.37.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.37.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.38.input_layernorm.weight": "model-00006-of-00008.safetensors",
"model.layers.38.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.38.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.38.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.38.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
"model.layers.38.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.38.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.38.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.38.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.39.input_layernorm.weight": "model-00007-of-00008.safetensors",
"model.layers.39.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.39.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.39.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.39.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
"model.layers.39.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.39.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.39.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.39.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
"model.layers.4.input_layernorm.weight": "model-00001-of-00008.safetensors",
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00008.safetensors",
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.40.input_layernorm.weight": "model-00007-of-00008.safetensors",
"model.layers.40.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.40.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.40.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.40.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
"model.layers.40.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.40.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.40.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.40.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.41.input_layernorm.weight": "model-00007-of-00008.safetensors",
"model.layers.41.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.41.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.41.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.41.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
"model.layers.41.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.41.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.41.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.41.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.42.input_layernorm.weight": "model-00007-of-00008.safetensors",
"model.layers.42.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.42.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.42.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.42.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
"model.layers.42.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.42.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.42.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.42.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.43.input_layernorm.weight": "model-00007-of-00008.safetensors",
"model.layers.43.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.43.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.43.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.43.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
"model.layers.43.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.43.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.43.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.43.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.44.input_layernorm.weight": "model-00007-of-00008.safetensors",
"model.layers.44.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.44.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.44.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.44.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
"model.layers.44.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.44.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.44.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.44.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.45.input_layernorm.weight": "model-00007-of-00008.safetensors",
"model.layers.45.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.45.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.45.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.45.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
"model.layers.45.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.45.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.45.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.45.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.46.input_layernorm.weight": "model-00008-of-00008.safetensors",
"model.layers.46.mlp.down_proj.weight": "model-00008-of-00008.safetensors",
"model.layers.46.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.46.mlp.up_proj.weight": "model-00008-of-00008.safetensors",
"model.layers.46.post_attention_layernorm.weight": "model-00008-of-00008.safetensors",
"model.layers.46.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.46.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.46.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.46.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
"model.layers.47.input_layernorm.weight": "model-00008-of-00008.safetensors",
"model.layers.47.mlp.down_proj.weight": "model-00008-of-00008.safetensors",
"model.layers.47.mlp.gate_proj.weight": "model-00008-of-00008.safetensors",
"model.layers.47.mlp.up_proj.weight": "model-00008-of-00008.safetensors",
"model.layers.47.post_attention_layernorm.weight": "model-00008-of-00008.safetensors",
"model.layers.47.self_attn.k_proj.weight": "model-00008-of-00008.safetensors",
"model.layers.47.self_attn.o_proj.weight": "model-00008-of-00008.safetensors",
"model.layers.47.self_attn.q_proj.weight": "model-00008-of-00008.safetensors",
"model.layers.47.self_attn.v_proj.weight": "model-00008-of-00008.safetensors",
"model.layers.5.input_layernorm.weight": "model-00001-of-00008.safetensors",
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00008.safetensors",
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
"model.layers.6.input_layernorm.weight": "model-00002-of-00008.safetensors",
"model.layers.6.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.6.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.6.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.6.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
"model.layers.6.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.6.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.6.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.6.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.7.input_layernorm.weight": "model-00002-of-00008.safetensors",
"model.layers.7.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.7.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.7.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.7.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
"model.layers.7.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.7.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.7.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.7.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.8.input_layernorm.weight": "model-00002-of-00008.safetensors",
"model.layers.8.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.8.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.8.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.8.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
"model.layers.8.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.8.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.8.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.8.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.9.input_layernorm.weight": "model-00002-of-00008.safetensors",
"model.layers.9.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.9.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.9.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.9.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
"model.layers.9.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.9.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.9.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
"model.layers.9.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
"model.norm.weight": "model-00008-of-00008.safetensors"
}
}

23
special_tokens_map.json Normal file
View File

@@ -0,0 +1,23 @@
{
"bos_token": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

126180
tokenizer.json Normal file

File diff suppressed because it is too large Load Diff

3961
tokenizer_config.json Normal file

File diff suppressed because it is too large Load Diff