初始化项目,由ModelHub XC社区提供模型
Model: adamo1139/Yi-34B-AEZAKMI-v1 Source: Original Platform
This commit is contained in:
35
.gitattributes
vendored
Normal file
35
.gitattributes
vendored
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.model filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
324
LICENSE
Normal file
324
LICENSE
Normal file
@@ -0,0 +1,324 @@
|
|||||||
|
Yi Series Models Community License Agreement
|
||||||
|
Version: 2.1
|
||||||
|
Date of Release: November 23, 2023
|
||||||
|
|
||||||
|
1. Definition
|
||||||
|
|
||||||
|
“Agreement” refers to the terms and conditions defined in this Yi Series Models
|
||||||
|
Community License Agreement for the use, reproduction and distribution of Yi
|
||||||
|
Series Models.
|
||||||
|
|
||||||
|
“Model” refers to associated components (including checkpoints) developed based
|
||||||
|
on machine learning, including learned weights and parameters (including the
|
||||||
|
status of optimizer).
|
||||||
|
|
||||||
|
“Yi Series Models” refers to opensource models with different specifications and
|
||||||
|
capabilities named “Yi” provided by the Licensor, including Yi-6B, Yi-34B etc.
|
||||||
|
|
||||||
|
“Derivatives” refers to all modifications to Yi Series Models, work based on Yi
|
||||||
|
Series Models, or any other models created or initialized by transferring the
|
||||||
|
weights, parameters, activations, or output patterns of Yi Series Models to
|
||||||
|
other models to achieve similar performance, including but not limited to
|
||||||
|
methods that require using intermediate data representations or generating
|
||||||
|
synthetic data based on Yi Series Models to train other models.
|
||||||
|
|
||||||
|
“Licensor” refers to Beijing Lingyiwanwu Information Technology Co., Ltd.
|
||||||
|
|
||||||
|
“you” refers to an individual or legal entity that exercises the license granted
|
||||||
|
by this Agreement and/or uses the Yi Series Models for any purpose and in any
|
||||||
|
field of use.
|
||||||
|
|
||||||
|
“Third Party” refers to any individuals, legal entities or non-legal
|
||||||
|
organizations other than you.
|
||||||
|
|
||||||
|
“Distribute” refers to transmitting, copying, publishing, or otherwise sharing
|
||||||
|
the Yi Series Models with third parties, including providing the Yi Series
|
||||||
|
Models through electronic or other remote means (such as any SaaS software or
|
||||||
|
PaaS software accessed via API or web access).
|
||||||
|
|
||||||
|
“Commercial Purposes” refers to the use of the Yi Series Models, directly or
|
||||||
|
indirectly, for the operation, promotion, revenue generation, or any other
|
||||||
|
profit-making purposes for entities or individuals.
|
||||||
|
|
||||||
|
“Laws and Regulations” refers to the laws and administrative regulations of the
|
||||||
|
mainland of the People's Republic of China (for the purposes of this Agreement
|
||||||
|
only, excluding Hong Kong, Macau, and Taiwan).
|
||||||
|
|
||||||
|
“Personal Information” refers to various information related to identified or
|
||||||
|
identifiable natural persons recorded electronically or by other means,
|
||||||
|
excluding information that has been anonymized.
|
||||||
|
|
||||||
|
“Logo” refers to any trademark, service mark, trade name, domain name, website
|
||||||
|
name, or other distinctive branding marks.
|
||||||
|
|
||||||
|
2. License and License Restrictions
|
||||||
|
The Licensor hereby grants you a non-exclusive, global, non-transferable,
|
||||||
|
non-sub-licensable, revocable, and royalty-free copyright license. You must
|
||||||
|
adhere to the following license restrictions:
|
||||||
|
|
||||||
|
1) Your use of the Yi Series Models must comply with the Laws and Regulations as
|
||||||
|
well as applicable legal requirements of other countries/regions, and respect
|
||||||
|
social ethics and moral standards, including but not limited to, not using the
|
||||||
|
Yi Series Models for purposes prohibited by Laws and Regulations as well as
|
||||||
|
applicable legal requirements of other countries/regions, such as harming
|
||||||
|
national security, promoting terrorism, extremism, inciting ethnic or racial
|
||||||
|
hatred, discrimination, violence, or pornography, and spreading false harmful
|
||||||
|
information.
|
||||||
|
|
||||||
|
2) You shall not, for military or unlawful purposes or in ways not allowed by
|
||||||
|
Laws and Regulations as well as applicable legal requirements of other
|
||||||
|
countries/regions, a) use, copy or Distribute the Yi Series Models, or b) create
|
||||||
|
complete or partial Derivatives of the Yi Series Models.
|
||||||
|
|
||||||
|
3) Your use of the Yi Series Models (including using the output of the Yi Series
|
||||||
|
Models) and the creation of Derivatives must not infringe upon the legitimate
|
||||||
|
rights of any Third Party, including but not limited to the rights of personal
|
||||||
|
rights such as the right to likeness, reputation, and privacy, as well as
|
||||||
|
intellectual property rights such as copyrights, patents, trade secrets, and
|
||||||
|
other property rights.
|
||||||
|
|
||||||
|
4) You must clearly attribute the source of the Yi Series Models to the Licensor
|
||||||
|
and provide a copy of this Agreement to any Third-Party users of the Yi Series
|
||||||
|
Models and Derivatives.
|
||||||
|
|
||||||
|
5) If you modify the Yi Series Models to create Derivatives, you must clearly
|
||||||
|
indicate the substantial modifications made, and these modifications shall not
|
||||||
|
violate the license restrictions of this Agreement. You shall not enable,
|
||||||
|
assist, or in any way facilitate Third Parties to violate the license
|
||||||
|
restrictions of this Agreement.
|
||||||
|
|
||||||
|
If you plan to use the Yi Series Models and Derivatives for Commercial Purposes,
|
||||||
|
please refer to the Registration Form of Yi Series Models for Commercial Purposes
|
||||||
|
(“Registration Form”), as provided in Attachment 1 of the Yi Series Models
|
||||||
|
Commercial License Agreement (available at https://www.lingyiwanwu.com/yi-license)
|
||||||
|
and send completed Registration Form to the email: yi@01.ai to complete the
|
||||||
|
registration and obtain the license for Commercial Purposes. If you obtained the
|
||||||
|
license for Commercial Purposes and use the Yi Series Models and Derivatives for
|
||||||
|
Commercial Purposes, you must comply with the afore-mentioned license restrictions
|
||||||
|
and restrictions specified under the Yi Series Models Commercial License Agreement.
|
||||||
|
|
||||||
|
|
||||||
|
3. Intellectual Property
|
||||||
|
The ownership of the Yi Series Models and their related intellectual property
|
||||||
|
rights is solely held by the Licensor.
|
||||||
|
|
||||||
|
In any circumstance, without the prior written consent of the Licensor, you are
|
||||||
|
not allowed to use any Logo associated with the Licensor. If your use of
|
||||||
|
Licensor's Logo in violation of this Agreement causes any losses to the Licensor
|
||||||
|
or others, you will bear full legal responsibility.
|
||||||
|
|
||||||
|
|
||||||
|
4. Disclaimer and Limitation of Liability
|
||||||
|
|
||||||
|
The Yi Series Models are provided "AS IS." The Licensor does not provide any
|
||||||
|
express or implied warranties for the Yi Series Models, including but not
|
||||||
|
limited to stability, ownership, merchantability, non-infringement, or fitness
|
||||||
|
for a specific purpose of the Yi Series Models and their output results. You
|
||||||
|
assume all responsibilities for the risks and consequences arising from the use,
|
||||||
|
reproduction, distribution of the Yi Series Models, and the creation of
|
||||||
|
Derivatives.
|
||||||
|
|
||||||
|
The Licensor complies with Laws and Regulations at all stages of model training,
|
||||||
|
maintaining the legality, authenticity, accuracy, objectivity, and diversity of
|
||||||
|
data and algorithms. The Licensor is not liable for any direct, indirect,
|
||||||
|
incidental consequences, and other losses or damages related to your use,
|
||||||
|
reproduction, and distribution of the Yi Series Models, and the creation of
|
||||||
|
Derivatives under this Agreement. This includes but is not limited to:
|
||||||
|
|
||||||
|
1) The Licensor is not responsible for data security risks resulting from your
|
||||||
|
use of the Yi Series Models.
|
||||||
|
|
||||||
|
2) The Yi Series Models may contain Personal Information. When you use Yi Series
|
||||||
|
Models, you acknowledge that you are the data processing entity as defined under
|
||||||
|
the Laws and Regulations responsible for determining the processing methods and
|
||||||
|
purposes of Personal Information. You must comply with legal requirements for
|
||||||
|
processing any Personal Information that may be contained in the Yi Series
|
||||||
|
Models and assume the associated legal responsibilities, as well as the risks
|
||||||
|
and consequences of processing Personal Information.
|
||||||
|
|
||||||
|
3) The Licensor is not liable for reputation risks arising from your use of the
|
||||||
|
Yi Series Models or the output results of the Yi Series Models.
|
||||||
|
|
||||||
|
4) The Licensor is not liable for intellectual property risks associated with
|
||||||
|
your use of the Yi Series Models’ output results.
|
||||||
|
|
||||||
|
If your use, reproduction, distribution of the Yi Series Models, or the creation
|
||||||
|
of Derivatives result in losses to the Licensor, the Licensor has the right to
|
||||||
|
seek compensation from you. For any claims made by Third Parties against the
|
||||||
|
Licensor related to your use, reproduction, and distribution of the Yi Series
|
||||||
|
Models, or the creation of Derivatives, the Licensor has the right to demand
|
||||||
|
that you defend, compensate, and indemnify the Licensor and protect the Licensor
|
||||||
|
from harm.
|
||||||
|
|
||||||
|
|
||||||
|
5. Dispute Resolution
|
||||||
|
|
||||||
|
The stipulation, effectiveness, interpretation, performance, modification, and
|
||||||
|
termination of the Agreement, the use, copy and Distribute of the Yi Series
|
||||||
|
Models, and dispute resolution associated with your use, copy and distribution
|
||||||
|
shall be governed by the laws of the mainland of the People's Republic of China
|
||||||
|
(for the purposes of this agreement only, excluding Hong Kong, Macau, and
|
||||||
|
Taiwan), and the application of conflict of laws is excluded.
|
||||||
|
|
||||||
|
Any disputes arising from the use, copy or distribution of the Yi Series Models
|
||||||
|
should first be resolved through amicable negotiations. If negotiations fail,
|
||||||
|
legal proceedings should be initiated in the People's Court at the location of
|
||||||
|
the Licensor.
|
||||||
|
|
||||||
|
|
||||||
|
6. Effectiveness and Termination of the Agreement
|
||||||
|
|
||||||
|
Your use of the Yi Series Models signifies that you have read and agreed to be
|
||||||
|
bound by the terms of the Agreement. The Agreement becomes effective from the
|
||||||
|
date of your use of the Yi Series Models and will terminate from the date you
|
||||||
|
cease using the Yi Series Models. If you violate any terms or restrictions in
|
||||||
|
the Agreement, the Licensor reserves the right to terminate the Agreement.
|
||||||
|
|
||||||
|
Upon termination of the Agreement, you must immediately cease using the Yi
|
||||||
|
Series Models. Section 4, "Disclaimer and Limitation of Liability," and Section
|
||||||
|
5, "Dispute Resolution," of this Agreement remain in effect after the
|
||||||
|
termination of this Agreement.
|
||||||
|
|
||||||
|
|
||||||
|
7. Updates to the Agreement and Contact Information
|
||||||
|
|
||||||
|
The Licensor reserves the right to update the Agreement from time to time. The
|
||||||
|
latest version of the Agreement will be posted by the Licensor through
|
||||||
|
https://01.ai.
|
||||||
|
|
||||||
|
For any questions related to licensing and copyright, please contact the
|
||||||
|
Licensor at yi@01.ai.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Yi系列模型社区许可协议
|
||||||
|
版本: 2.1
|
||||||
|
发布日期: 2023年11月23日
|
||||||
|
|
||||||
|
1. 定义
|
||||||
|
|
||||||
|
“协议”是指本协议中定义Yi系列模型使用、复制和分发的条款和条件。
|
||||||
|
|
||||||
|
“模型”是指任何附带的基于机器学习的组件(包括检查点),包括学习的权重、参数(包括优
|
||||||
|
化器状态)。
|
||||||
|
|
||||||
|
“Yi系列模型”是指许可方开源的以Yi命名的不同规格、不同能力的模型,包括
|
||||||
|
Yi-6B、Yi-34B等。
|
||||||
|
|
||||||
|
“模型衍生品”是指对Yi系列模型的所有修改、基于Yi系列模型的工作,或通过将Yi系列模型
|
||||||
|
的权重、参数、激活或输出模式转移到其他模型而创建或初始化的任何其他模型,以使其他
|
||||||
|
模型的性能与Yi系列模型类似,包括但不限于需要使用中间数据表示的提取方法或基于Yi系
|
||||||
|
列模型生成合成数据来训练其他模型的方法。
|
||||||
|
|
||||||
|
“许可方”是指北京零一万物信息技术有限公司。
|
||||||
|
|
||||||
|
“您”是指行使本协议授予的权限和/或出于任何目的和在任何使用领域使用Yi系列模型的个
|
||||||
|
人或法人实体。
|
||||||
|
|
||||||
|
“第三方”是指您之外的任何个人、法人实体或非法人组织。
|
||||||
|
|
||||||
|
“分发”是指向第三方传输、复制、发布或以其他方式共享Yi系列模型,包括将Yi系列模型作
|
||||||
|
为通过电子或其他远程方式(例如基于 API 或 Web 访问的任何 SaaS 软件或 PaaS 软
|
||||||
|
件)提供。
|
||||||
|
|
||||||
|
“商业用途”是指使用Yi系列模型,直接或间接为实体或个人进行运营、推广或产生收入,或
|
||||||
|
用于任何其他盈利目的。
|
||||||
|
|
||||||
|
“法律法规”是指中华人民共和国大陆地区(仅为本协议之目的,不包括香港、澳门和台湾)
|
||||||
|
的法律及行政法规。
|
||||||
|
|
||||||
|
“个人信息”是指以电子或者其他方式记录的与已识别或者可识别的自然人有关的各种信息,
|
||||||
|
不包括匿名化处理后的信息。
|
||||||
|
|
||||||
|
“标识” 是指任何商标、服务标记、商号、域名、网站名称或其他带有显著品牌特征的标记。
|
||||||
|
|
||||||
|
|
||||||
|
2. 许可及许可限制
|
||||||
|
|
||||||
|
许可方特此授予您非排他性、全球性、不可转让、不可再许可、可撤销、免版税的版权许可。
|
||||||
|
您必须满足如下许可限制条件:
|
||||||
|
|
||||||
|
1) 您对Yi系列模型的使用应遵守法律法规以及其他国家/地区适用的法律要求、尊重社会公
|
||||||
|
德和伦理道德。包括但不限于您不得将Yi系列模型用作危害国家安全、宣扬恐怖主义、极端
|
||||||
|
主义,宣扬民族及种族仇恨、歧视,暴力、色情,以及虚假有害信息等法律法规以及其他国
|
||||||
|
家/地区适用的法律要求禁止的目的。
|
||||||
|
|
||||||
|
2) 您不得出于军事或非法目的,或以法律法规以及其他国家/地区适用的法律要求所不允许
|
||||||
|
的方式 a) 使用、复制、或分发Yi系列模型; 或 b) 创建Yi系列模型的全部或部分衍生品。
|
||||||
|
|
||||||
|
3) 您对Yi系列模型的使用(包括使用Yi系列模型的输出)以及模型衍生品的创建不得侵犯
|
||||||
|
任何第三方的合法权益,包括但不限于他人肖像权、名誉权、隐私权等人格权,著作权、专
|
||||||
|
利权、商业秘密等知识产权,或其他财产权益。
|
||||||
|
|
||||||
|
4) 您必须向Yi系列模型及Yi系列模型衍生品的任何第三方使用者明确Yi系列模型的来源为
|
||||||
|
许可方并向其提供本协议的副本。
|
||||||
|
|
||||||
|
5) 若您修改Yi系列模型得到模型衍生品,您必须以显著的方式说明修改的内容,且上述修
|
||||||
|
改不得违反本协议的许可限制条件,也不能允许、协助或以其他方式使得第三方违反本协议
|
||||||
|
中的许可限制条件。
|
||||||
|
|
||||||
|
如果您计划将Yi系列模型及模型衍生品用作商业用途,请参见《Yi系列模型商用许可协议》
|
||||||
|
(参见:https://www.lingyiwanwu.com/yi-license)附件一《Yi系列模型商用登
|
||||||
|
记表》(“登记表”)并将填写完毕的登记表发送至 yi@01.ai 邮箱完成登记即可获得商用
|
||||||
|
许可。若您获得商用许可并将Yi系列模型及模型衍生品用作商业用途,您应满足许可方上述
|
||||||
|
许可限制条件及《Yi系列模型商用许可协议》中的商业许可限制。
|
||||||
|
|
||||||
|
3. 知识产权
|
||||||
|
|
||||||
|
Yi系列模型的所有权及其相关知识产权,由许可方单独所有。
|
||||||
|
|
||||||
|
在任何情况下,未经许可方事先书面同意,您不得以任何方式使用许可方的任何标识。由于
|
||||||
|
您违反本协议使用许可方的标识给许可方或他人造成损失的,由您承担全部法律责任。
|
||||||
|
|
||||||
|
|
||||||
|
4. 免责声明及责任限制
|
||||||
|
|
||||||
|
Yi系列模型按“原样”提供。许可方不对Yi系列模型提供任何明示或暗示的保证,包括但不限
|
||||||
|
于:模型及输出结果的稳定性、所有权、适销性、非侵权性、或特定用途适用性。您将对适
|
||||||
|
用、复制及分发Yi系列模型以及创建模型衍生品所产生的风险与后果承担所有责任。
|
||||||
|
|
||||||
|
许可方在模型训练的所有阶段都遵守法律法规,坚持维护数据和算法的合法、真实、准确、
|
||||||
|
客观和多样性。许可方不对您根据本协议使用、复制及分发Yi系列模型,以及创建模型衍生
|
||||||
|
品而产生或与之相关的任何直接、间接、附带的后果、以及其他损失或损害承担责任。包括
|
||||||
|
但不限于:
|
||||||
|
|
||||||
|
1) 许可方不承担您因使用Yi系列模型而导致的数据安全风险。
|
||||||
|
|
||||||
|
2) Yi系列模型中可能包含个人信息。在您使用Yi系列模型的过程中,您承认您为法律法规
|
||||||
|
定义下决定个人信息处理方式和目的的个人信息处理者。您应遵守法律法规要求处理Yi系列
|
||||||
|
模型中可能包含的个人信息,并承担相应的法律责任,以及处理个人信息的风险和后果。
|
||||||
|
|
||||||
|
3) 许可方不承担您使用Yi系列模型或模型输出结果而产生的声誉风险。
|
||||||
|
|
||||||
|
4) 许可方不承担您使用Yi系列模型的输出结果涉及的知识产权风险。
|
||||||
|
|
||||||
|
若由于您对Yi系列模型的使用、复制或分发,或者创建模型衍生品而导致许可方遭受损失,
|
||||||
|
许可方有权要求您对许可方的损失进行赔偿。对于任何第三方向许可方提出的因您使用、复
|
||||||
|
制或分发Yi系列模型或创建模型衍生品行为的相关索赔,许可方有权要求您为许可方进行辩
|
||||||
|
护、赔偿并使许可方免受损害。
|
||||||
|
|
||||||
|
|
||||||
|
5. 争议解决
|
||||||
|
|
||||||
|
协议的订立、效力、解释、履行、修改和终止,使用、复制和分发Yi系列模型以及争议解决
|
||||||
|
均适用中华人民共和国大陆地区(仅为本协议之目的,不包括香港、澳门和台湾)法律,并
|
||||||
|
排除冲突法的适用。
|
||||||
|
|
||||||
|
因使用、复制和分发Yi系列模型而发生的任何争议,各方应首先通过友好协商的方式加以解
|
||||||
|
决。协商不成时,应向许可方所在地人民法院提起诉讼。
|
||||||
|
|
||||||
|
|
||||||
|
6. 协议的生效及终止
|
||||||
|
|
||||||
|
您使用Yi系列模型即表示您已阅读并同意接受协议的约束。协议自您使用Yi系列模型之日起
|
||||||
|
生效并将在您停止使用Yi系列模型之日起终止。若您违反协议中的任何条款或限制,许可方
|
||||||
|
有权终止协议。
|
||||||
|
|
||||||
|
若协议终止,您需立即停止使用Yi系列模型。本协议第4条“免责声明及责任限制”及第5条
|
||||||
|
“争议解决”在协议终止后仍有效。
|
||||||
|
|
||||||
|
|
||||||
|
7. 协议更新及联系方式
|
||||||
|
|
||||||
|
许可方有权对协议进行不时更新。许可方将通过 https://01.ai 公布协议最新版本。有关
|
||||||
|
许可和版权的任何问题,请通过 yi@01.ai 与许可方联系。
|
||||||
92
README.md
Normal file
92
README.md
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
---
|
||||||
|
license: apache-2.0
|
||||||
|
tags:
|
||||||
|
- llm
|
||||||
|
- yi
|
||||||
|
- fine-tune
|
||||||
|
---
|
||||||
|
|
||||||
|
## Model description
|
||||||
|
|
||||||
|
Yi-34B base model fine-tuned on AEZAKMI v1 dataset. Training took around 33 hours on single local RTX 3090 Ti.
|
||||||
|
It's like airoboros but with less gptslop, no refusals and less typical language used by RLHFed OpenAI models.
|
||||||
|
Say goodbye to "It's important to remember"! \
|
||||||
|
Prompt format is standard chatml. Don't expect it to be good at math, riddles or be crazy smart. My end goal with AEZAKMI is to create a cozy free chatbot.
|
||||||
|
Cost of this fine-tune is about $3 in electricity. This was my first attempt at training Yi-34B with this dataset.
|
||||||
|
Base model used for fine-tuning was 4k context Yi-34B-Llama model shared by chargoddard.
|
||||||
|
|
||||||
|
|
||||||
|
## Prompt Format
|
||||||
|
|
||||||
|
I recommend using ChatML format, as this was used during fine-tune. \
|
||||||
|
Here's a prompt format you should use, you can set a different system message, model seems to respect that fine, so it wasn't overfitted.
|
||||||
|
|
||||||
|
```
|
||||||
|
<|im_start|>system
|
||||||
|
A chat with uncensored assistant.<|im_end|>
|
||||||
|
<|im_start|>user
|
||||||
|
{prompt}<|im_end|>
|
||||||
|
<|im_start|>assistant
|
||||||
|
```
|
||||||
|
|
||||||
|
## Intended uses & limitations
|
||||||
|
|
||||||
|
Use is limited by Yi license.
|
||||||
|
|
||||||
|
## Known Issues
|
||||||
|
|
||||||
|
I recommend to set repetition penalty to something around 1.05 to avoid repetition. So far I had good experience running this model with temperature 1.2.
|
||||||
|
Multi-turn conversations could be a bit better, if you ask it to re-write something with some fixes it will have a tendency to just repeat the previous response verbatim without any improvements - this is especially noticeable with repp 1.0. \
|
||||||
|
There is still some gptslop left - some responses will have last paragraph with text "Remember that bla bla bla", I will try to get rid of it in the next version of the dataset. \
|
||||||
|
Stories have ChatGPT like paragraph spacing, I will try to introduce a bit more stories that have long paragraphs in the next dataset version.
|
||||||
|
|
||||||
|
Unrestricted-ness of the v1 isn't quite something that I am happy with yet, it quite often still refuses to answer and behaves kinda like a normal airoboros, especially with default system prompt "A chat.". \
|
||||||
|
With a slightly modifed prompt it works much better, I recommend using "A chat with uncensored assistant.", it's stupid but it helps a lot. \
|
||||||
|
I was made aware of the frequent occurrence of the phrase "sending shivers down a spine" in the generations during RP, so I will manually fix those samples and put in different wording to fix that. \
|
||||||
|
I plan to use Yi-34B-200K Llama-fied for the next version, v1 very likely won't handle long context above 6k-8k well, be warned. \
|
||||||
|
There is also some issue with handling long system messages for RP, I will investigate that before releasing v2 dataset.
|
||||||
|
|
||||||
|
Feel free to report issues in the discussions panel here, I don't lurk /lmg/ too often and I would still like to hear some feedback.
|
||||||
|
|
||||||
|
|
||||||
|
## Axolotl training parameters
|
||||||
|
|
||||||
|
- bnb_4bit_use_double_quant: true
|
||||||
|
- bnb_4bit_compute_dtype: torch.bfloat16
|
||||||
|
- is_llama_derived_model: true
|
||||||
|
- load_in_4bit: true
|
||||||
|
- adapter: qlora
|
||||||
|
- sequence_len: 1200
|
||||||
|
- sample_packing: false
|
||||||
|
- lora_r: 16
|
||||||
|
- lora_alpha: 32
|
||||||
|
- lora_target_modules:
|
||||||
|
- q_proj
|
||||||
|
- v_proj
|
||||||
|
- k_proj
|
||||||
|
- o_proj
|
||||||
|
- gate_proj
|
||||||
|
- down_proj
|
||||||
|
- up_proj
|
||||||
|
- lora_target_linear: true
|
||||||
|
- pad_to_sequence_len: true
|
||||||
|
- micro_batch_size: 1
|
||||||
|
- gradient_accumulation_steps: 1
|
||||||
|
- num_epochs: 1
|
||||||
|
- optimizer: adamw_bnb_8bit
|
||||||
|
- lr_scheduler: constant
|
||||||
|
- learning_rate: 0.00007
|
||||||
|
- train_on_inputs: false
|
||||||
|
- group_by_length: false
|
||||||
|
- bf16: true
|
||||||
|
- bfloat16: true
|
||||||
|
- flash_optimum: false
|
||||||
|
- gradient_checkpointing: true
|
||||||
|
- flash_attention: true
|
||||||
|
- seed: 42
|
||||||
|
|
||||||
|
|
||||||
|
## Upcoming
|
||||||
|
|
||||||
|
~I will release adapter files and maybe exllama v2 quant shortly.~ \
|
||||||
|
LoRA and exl2 quant has been released
|
||||||
28
config.json
Normal file
28
config.json
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
{
|
||||||
|
"_name_or_path": "./Yi-34B-Llama",
|
||||||
|
"architectures": [
|
||||||
|
"LlamaForCausalLM"
|
||||||
|
],
|
||||||
|
"attention_bias": false,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"hidden_act": "silu",
|
||||||
|
"hidden_size": 7168,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"intermediate_size": 20480,
|
||||||
|
"max_position_embeddings": 4096,
|
||||||
|
"model_type": "llama",
|
||||||
|
"num_attention_heads": 56,
|
||||||
|
"num_hidden_layers": 60,
|
||||||
|
"num_key_value_heads": 8,
|
||||||
|
"pad_token_id": 0,
|
||||||
|
"pretraining_tp": 1,
|
||||||
|
"rms_norm_eps": 1e-05,
|
||||||
|
"rope_scaling": null,
|
||||||
|
"rope_theta": 5000000.0,
|
||||||
|
"tie_word_embeddings": false,
|
||||||
|
"torch_dtype": "float16",
|
||||||
|
"transformers_version": "4.35.0",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 64000
|
||||||
|
}
|
||||||
7
generation_config.json
Normal file
7
generation_config.json
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"pad_token_id": 0,
|
||||||
|
"transformers_version": "4.35.0"
|
||||||
|
}
|
||||||
3
model-00001-of-00015.safetensors
Normal file
3
model-00001-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:ad57b9d7ee314070fa56605833a5e9cd40dce3dd818c2b77f5ce712d49ddb706
|
||||||
|
size 4793130720
|
||||||
3
model-00002-of-00015.safetensors
Normal file
3
model-00002-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:f9a8bba5fc0df4b6f14f51ebe1e645a8ae60991ca90792ffe127ed5db2829c1f
|
||||||
|
size 4756459680
|
||||||
3
model-00003-of-00015.safetensors
Normal file
3
model-00003-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:be0252e22e74c34e8e919f2d1b6ee91a87c1d9b8fde8d05c39ce25f439fa0f85
|
||||||
|
size 4991370096
|
||||||
3
model-00004-of-00015.safetensors
Normal file
3
model-00004-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:f48f2017a476111a492afd77a0f40be6bf1bbadd89262b27cc27a9f2870be39b
|
||||||
|
size 4756459720
|
||||||
3
model-00005-of-00015.safetensors
Normal file
3
model-00005-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:edacbbdc1ef87ddc82a0bbf7152638861ae00b21e0a8e32149448e9e57bf305e
|
||||||
|
size 4756459720
|
||||||
3
model-00006-of-00015.safetensors
Normal file
3
model-00006-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:31d57e12bcca4f4c0d4802c63e10739f98b24c8e1fb58691d7a0d4e23f512c9d
|
||||||
|
size 4991370120
|
||||||
3
model-00007-of-00015.safetensors
Normal file
3
model-00007-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:fa1a2f0787f2a74048a8a3356ee7059d439f3951b329dc5a7447ca2e244f39b0
|
||||||
|
size 4756459720
|
||||||
3
model-00008-of-00015.safetensors
Normal file
3
model-00008-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:e6025c3cf36946dffe6d8b57cb97e2b06958ba62e375cae27de54a5fd6a5c543
|
||||||
|
size 4756459720
|
||||||
3
model-00009-of-00015.safetensors
Normal file
3
model-00009-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:9d0dd557ddb068505ffffbc6369a6d4d7f001e199113d311d4440122c8fdda41
|
||||||
|
size 4991370120
|
||||||
3
model-00010-of-00015.safetensors
Normal file
3
model-00010-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:82448c48125c9bfad0752ac9a3ceaadcd189fd3773a3502fe9157203d32194e9
|
||||||
|
size 4756459720
|
||||||
3
model-00011-of-00015.safetensors
Normal file
3
model-00011-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:bc8ccc47ea7a2edc05ebadebbc986e7c38c2f4e565f6a44b88fee7a9dd93a1d9
|
||||||
|
size 4756459720
|
||||||
3
model-00012-of-00015.safetensors
Normal file
3
model-00012-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:232f3fea24d4519f29194b1f07bd990a0e7b58a97ded85f247544314a10d3e79
|
||||||
|
size 4991370120
|
||||||
3
model-00013-of-00015.safetensors
Normal file
3
model-00013-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:f10de833cc21f038855174316e93caf2cc5fb2e0f7f00c7f9bcc1a0c06a0cc99
|
||||||
|
size 4756459720
|
||||||
3
model-00014-of-00015.safetensors
Normal file
3
model-00014-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:ce9bcb023e4a853127ffa59b42c359ef74e2e1eee6aa14176bf206428edae471
|
||||||
|
size 4756459720
|
||||||
3
model-00015-of-00015.safetensors
Normal file
3
model-00015-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:53a26e27f48b0fc9e5f499fc1de55cdde9491b40294d1f66d613b4ac07bdc24c
|
||||||
|
size 1211148848
|
||||||
550
model.safetensors.index.json
Normal file
550
model.safetensors.index.json
Normal file
@@ -0,0 +1,550 @@
|
|||||||
|
{
|
||||||
|
"metadata": {
|
||||||
|
"total_size": 68777834496
|
||||||
|
},
|
||||||
|
"weight_map": {
|
||||||
|
"lm_head.weight": "model-00015-of-00015.safetensors",
|
||||||
|
"model.embed_tokens.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.0.input_layernorm.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.1.input_layernorm.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.10.input_layernorm.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.10.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.10.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.10.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.10.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.10.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.10.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.10.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.10.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.11.input_layernorm.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.11.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.11.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.11.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.11.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.11.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.11.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.11.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.11.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.12.input_layernorm.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.12.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.12.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.12.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.12.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.12.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.12.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.12.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.12.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.13.input_layernorm.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.13.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.13.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.13.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.13.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.13.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.13.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.13.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.13.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.14.input_layernorm.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.14.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.14.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.14.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.14.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.14.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.14.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.14.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.14.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.15.input_layernorm.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.15.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.15.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.15.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.15.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.15.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.15.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.15.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.15.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.16.input_layernorm.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.16.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.16.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.16.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.16.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.16.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.16.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.16.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.16.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.17.input_layernorm.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.17.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.17.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.17.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.17.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.17.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.17.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.17.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.17.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.18.input_layernorm.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.18.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.18.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.18.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.18.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.18.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.18.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.18.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.18.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.19.input_layernorm.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.19.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.19.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.19.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.19.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.19.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.19.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.19.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.19.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.2.input_layernorm.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.20.input_layernorm.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.20.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.20.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.20.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.20.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.20.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.20.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.20.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.20.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.21.input_layernorm.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.21.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.21.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.21.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.21.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.21.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.21.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.21.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.21.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.22.input_layernorm.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.22.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.22.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.22.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.22.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.22.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.22.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.22.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.22.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.23.input_layernorm.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.23.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.23.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.23.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.23.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.23.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.23.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.23.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.23.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.24.input_layernorm.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.24.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.24.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.24.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.24.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.24.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.24.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.24.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.24.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.25.input_layernorm.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.25.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.25.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.25.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.25.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.25.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.25.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.25.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.25.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.26.input_layernorm.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.26.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.26.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.26.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.26.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.26.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.26.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.26.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.26.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.27.input_layernorm.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.27.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.27.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.27.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.27.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.27.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.27.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.27.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.27.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.28.input_layernorm.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.28.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.28.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.28.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.28.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.28.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.28.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.28.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.28.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.29.input_layernorm.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.29.mlp.down_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.29.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.29.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.29.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.29.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.29.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.29.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.29.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.3.input_layernorm.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.3.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.3.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.3.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.30.input_layernorm.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.30.mlp.down_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.30.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.30.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.30.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.30.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.30.self_attn.o_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.30.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.30.self_attn.v_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.31.input_layernorm.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.31.mlp.down_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.31.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.31.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.31.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.31.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.31.self_attn.o_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.31.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.31.self_attn.v_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.32.input_layernorm.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.32.mlp.down_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.32.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.32.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.32.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.32.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.32.self_attn.o_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.32.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.32.self_attn.v_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.33.input_layernorm.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.33.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.33.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.33.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.33.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.33.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.33.self_attn.o_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.33.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.33.self_attn.v_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.34.input_layernorm.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.34.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.34.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.34.mlp.up_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.34.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.34.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.34.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.34.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.34.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.35.input_layernorm.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.35.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.35.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.35.mlp.up_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.35.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.35.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.35.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.35.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.35.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.36.input_layernorm.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.36.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.36.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.36.mlp.up_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.36.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.36.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.36.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.36.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.36.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.37.input_layernorm.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.37.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.37.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.37.mlp.up_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.37.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.37.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.37.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.37.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.37.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.38.input_layernorm.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.38.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.38.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.38.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.38.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.38.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.38.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.38.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.38.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.39.input_layernorm.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.39.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.39.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.39.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.39.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.39.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.39.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.39.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.39.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.4.input_layernorm.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.4.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.4.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.4.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.4.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.4.self_attn.k_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.4.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.4.self_attn.q_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.4.self_attn.v_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.40.input_layernorm.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.40.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.40.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.40.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.40.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.40.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.40.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.40.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.40.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.41.input_layernorm.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.41.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.41.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.41.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.41.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.41.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.41.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.41.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.41.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.42.input_layernorm.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.42.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.42.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.42.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.42.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.42.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.42.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.42.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.42.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.43.input_layernorm.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.43.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.43.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.43.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.43.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.43.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.43.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.43.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.43.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.44.input_layernorm.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.44.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.44.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.44.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.44.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.44.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.44.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.44.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.44.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.45.input_layernorm.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.45.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.45.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.45.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.45.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.45.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.45.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.45.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.45.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.46.input_layernorm.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.46.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.46.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.46.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.46.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.46.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.46.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.46.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.46.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.47.input_layernorm.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.47.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.47.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.47.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.47.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.47.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.47.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.47.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.47.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.48.input_layernorm.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.48.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.48.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.48.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.48.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.48.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.48.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.48.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.48.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.49.input_layernorm.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.49.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.49.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.49.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.49.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.49.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.49.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.49.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.49.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.5.input_layernorm.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.5.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.5.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.5.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.5.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.5.self_attn.k_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.5.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.5.self_attn.q_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.5.self_attn.v_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.50.input_layernorm.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.50.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.50.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.50.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.50.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.50.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.50.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.50.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.50.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.51.input_layernorm.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.51.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.51.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.51.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.51.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.51.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.51.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.51.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.51.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.52.input_layernorm.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.52.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.52.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.52.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.52.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.52.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.52.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.52.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.52.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.53.input_layernorm.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.53.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.53.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.53.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.53.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.53.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.53.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.53.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.53.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.54.input_layernorm.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.54.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.54.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.54.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.54.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.54.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.54.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.54.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.54.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.55.input_layernorm.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.55.mlp.down_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.55.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.55.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.55.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.55.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.55.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.55.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.55.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.56.input_layernorm.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.56.mlp.down_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.56.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.56.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.56.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.56.self_attn.k_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.56.self_attn.o_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.56.self_attn.q_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.56.self_attn.v_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.57.input_layernorm.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.57.mlp.down_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.57.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.57.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.57.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.57.self_attn.k_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.57.self_attn.o_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.57.self_attn.q_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.57.self_attn.v_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.58.input_layernorm.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.58.mlp.down_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.58.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.58.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.58.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.58.self_attn.k_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.58.self_attn.o_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.58.self_attn.q_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.58.self_attn.v_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.59.input_layernorm.weight": "model-00015-of-00015.safetensors",
|
||||||
|
"model.layers.59.mlp.down_proj.weight": "model-00015-of-00015.safetensors",
|
||||||
|
"model.layers.59.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.59.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.59.post_attention_layernorm.weight": "model-00015-of-00015.safetensors",
|
||||||
|
"model.layers.59.self_attn.k_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.59.self_attn.o_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.59.self_attn.q_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.59.self_attn.v_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.6.input_layernorm.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.6.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.6.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.6.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.6.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.6.self_attn.k_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.6.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.6.self_attn.q_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.6.self_attn.v_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.7.input_layernorm.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.7.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.7.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.7.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.7.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.7.self_attn.k_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.7.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.7.self_attn.q_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.7.self_attn.v_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.8.input_layernorm.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.8.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.8.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.8.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.8.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.8.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.8.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.8.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.8.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.9.input_layernorm.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.9.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.9.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.9.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.9.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.9.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.9.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.9.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.9.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.norm.weight": "model-00015-of-00015.safetensors"
|
||||||
|
}
|
||||||
|
}
|
||||||
30
special_tokens_map.json
Normal file
30
special_tokens_map.json
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
{
|
||||||
|
"bos_token": {
|
||||||
|
"content": "<|startoftext|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": true,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"eos_token": {
|
||||||
|
"content": "<|endoftext|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": true,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"pad_token": {
|
||||||
|
"content": "<unk>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": true,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"unk_token": {
|
||||||
|
"content": "<unk>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": true,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
}
|
||||||
|
}
|
||||||
174143
tokenizer.json
Normal file
174143
tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
3
tokenizer.model
Normal file
3
tokenizer.model
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:386c49cf943d71aa110361135338c50e38beeff0a66593480421f37b319e1a39
|
||||||
|
size 1033105
|
||||||
41
tokenizer_config.json
Normal file
41
tokenizer_config.json
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
{
|
||||||
|
"added_tokens_decoder": {
|
||||||
|
"0": {
|
||||||
|
"content": "<unk>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": true,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"1": {
|
||||||
|
"content": "<|startoftext|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": true,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"2": {
|
||||||
|
"content": "<|endoftext|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": true,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"bos_token": "<|startoftext|>",
|
||||||
|
"clean_up_tokenization_spaces": false,
|
||||||
|
"eos_token": "<|endoftext|>",
|
||||||
|
"legacy": false,
|
||||||
|
"model_max_length": 4096,
|
||||||
|
"pad_token": "<unk>",
|
||||||
|
"padding_side": "right",
|
||||||
|
"sp_model_kwargs": {},
|
||||||
|
"spaces_between_special_tokens": false,
|
||||||
|
"tokenizer_class": "LlamaTokenizer",
|
||||||
|
"truncation_side": "right",
|
||||||
|
"unk_token": "<unk>",
|
||||||
|
"use_default_system_prompt": false
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user