初始化项目,由ModelHub XC社区提供模型
Model: Ujjwal-Tyagi/EXAONE-4.0-32B Source: Original Platform
This commit is contained in:
36
.gitattributes
vendored
Normal file
36
.gitattributes
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
*.png filter=lfs diff=lfs merge=lfs -text
|
||||
157
LICENSE
Normal file
157
LICENSE
Normal file
@@ -0,0 +1,157 @@
|
||||
EXAONE AI Model License Agreement 1.2 - NC
|
||||
|
||||
This License Agreement (“Agreement”) is entered into between you (“Licensee”) and LG Management Development
|
||||
Institute Co., Ltd. (“Licensor”), governing the use of the EXAONE AI Model (“Model”). By downloading,
|
||||
installing, copying, or using the Model, you agree to comply with and be bound by the terms of this Agreement.
|
||||
If you do not agree to all the terms, you must not download, install, copy, or use the Model. This Agreement
|
||||
constitutes a binding legal agreement between the Licensee and Licensor.
|
||||
|
||||
1. Definitions
|
||||
1.1 Model: The artificial intelligence model provided by Licensor, which includes any software,
|
||||
algorithms, machine learning models, or related components supplied by Licensor. This definition extends
|
||||
to encompass all updates, enhancements, improvements, bug fixes, patches, or other modifications that may
|
||||
be provided by Licensor from time to time, whether automatically or manually implemented.
|
||||
1.2 Derivatives: Any modifications, alterations, enhancements, improvements, adaptations, or derivative
|
||||
works of the Model created by Licensee or any third party. This includes changes made to the Model's
|
||||
architecture, parameters, data processing methods, or any other aspect of the Model that results in a
|
||||
modification of its functionality or output.
|
||||
1.3 Output: Any data, results, content, predictions, analyses, insights, or other materials generated by
|
||||
the Model or Derivatives, regardless of whether they are in their original form or have been further
|
||||
processed or modified by the Licensee. This includes, but is not limited to, textual or numerical produced
|
||||
directly or indirectly through the use of the Model.
|
||||
1.4 Licensor: LG Management Development Institute Co., Ltd., the owner, developer, and provider of the
|
||||
EXAONE AI Model. The Licensor holds all rights, title, and interest in the Model and is responsible for
|
||||
granting licenses to use the Model under the terms specified in this Agreement.
|
||||
1.5 Licensee: The individual, organization, corporation, academic institution, government agency, or other
|
||||
entity using or intending to use the Model under the terms and conditions of this Agreement. The Licensee
|
||||
is responsible for ensuring compliance with the Agreement by all authorized users who access or utilize
|
||||
the Model on behalf of the Licensee.
|
||||
|
||||
2. License Grant
|
||||
2.1 Grant of License: Subject to the terms and conditions outlined in this Agreement, the Licensor hereby
|
||||
grants the Licensee a limited, non-exclusive, non-transferable, worldwide, and revocable license to:
|
||||
a. Access, download, install, and use the Model solely for research and educational purposes. This
|
||||
includes evaluation, testing, academic research, experimentation, learning, teaching, training and
|
||||
participation in competitions, provided that such participation is in a non-commercial context.
|
||||
Notwithstanding Section 3.1, the Licensee may only provide the Model or Derivatives for a competition
|
||||
if no commercial license is granted to the competition organizer or any third party.
|
||||
b. Publicly disclose research results and findings derived from the use of the Model or Derivatives,
|
||||
including publishing papers or presentations.
|
||||
c. Modify the Model and create Derivatives based on the Model, provided that such modifications and
|
||||
Derivatives are used exclusively for research and educational purposes. The Licensee may conduct
|
||||
experiments, perform analyses, and apply custom modifications to the Model to explore its capabilities
|
||||
and performance under various scenarios. If the Model is modified, the modified Model must include
|
||||
"EXAONE" at the beginning of its name.
|
||||
d. Distribute the Model and Derivatives in each case with a copy of this Agreement.
|
||||
2.2 Scope of License: The license granted herein does not authorize the Licensee to use the Model for any
|
||||
purpose not explicitly permitted under this Agreement. Any use beyond the scope of this license, including
|
||||
any commercial application or external distribution, is strictly prohibited unless explicitly agreed upon
|
||||
in writing by the Licensor.
|
||||
|
||||
3. Restrictions
|
||||
3.1 Commercial Use: The Licensee is expressly prohibited from using the Model, Derivatives, or Output for
|
||||
any commercial purposes, including but not limited to, developing or deploying products, services, or
|
||||
applications that generate revenue, whether directly or indirectly. Any commercial exploitation of the
|
||||
Model or its derivatives requires a separate commercial license agreement with the Licensor. Furthermore,
|
||||
the Licensee shall not use the Model, Derivatives or Output to develop or improve any models that compete
|
||||
with the Licensor’s models.
|
||||
3.2 Reverse Engineering: The Licensee shall not decompile, disassemble, reverse engineer, or attempt to
|
||||
derive the source code, underlying ideas, algorithms, or structure of the Model, except to the extent that
|
||||
such activities are expressly permitted by applicable law. Any attempt to bypass or circumvent
|
||||
technological protection measures applied to the Model is strictly prohibited.
|
||||
3.3 Unlawful Use: The Licensee shall not use the Model and Derivatives for any illegal, fraudulent, or
|
||||
unauthorized activities, nor for any purpose that violates applicable laws or regulations. This includes
|
||||
but is not limited to the creation, distribution, or dissemination of malicious, deceptive, or unlawful
|
||||
content.
|
||||
3.4 Ethical Use: The Licensee shall ensure that the Model or Derivatives is used in an ethical and
|
||||
responsible manner, adhering to the following guidelines:
|
||||
a. The Model and Derivatives shall not be used to generate, propagate, or amplify false, misleading,
|
||||
or harmful information, including fake news, misinformation, or disinformation.
|
||||
b. The Model and Derivatives shall not be employed to create, distribute, or promote content that is
|
||||
discriminatory, harassing, defamatory, abusive, or otherwise offensive to individuals or groups based
|
||||
on race, gender, sexual orientation, religion, nationality, or other protected characteristics.
|
||||
c. The Model and Derivatives shall not infringe on the rights of others, including intellectual property
|
||||
rights, privacy rights, or any other rights recognized by law. The Licensee shall obtain all necessary
|
||||
permissions and consents before using the Model and Derivatives in a manner that may impact the rights
|
||||
of third parties.
|
||||
d. The Model and Derivatives shall not be used in a way that causes harm, whether physical, mental,
|
||||
emotional, or financial, to individuals, organizations, or communities. The Licensee shall take all
|
||||
reasonable measures to prevent misuse or abuse of the Model and Derivatives that could result in harm
|
||||
or injury.
|
||||
|
||||
4. Ownership
|
||||
4.1 Intellectual Property: All rights, title, and interest in and to the Model, including any
|
||||
modifications, Derivatives, and associated documentation, are and shall remain the exclusive property of
|
||||
the Licensor. The Licensee acknowledges that this Agreement does not transfer any ownership rights to the
|
||||
Licensee. All trademarks, service marks, and logos associated with the Model are the property of the
|
||||
Licensor.
|
||||
4.2 Output: Licensor claims no rights in Output. Licensee is solely responsible for the Output and its use.
|
||||
4.3 Attribution: In any publication or presentation of results obtained using the Model, the Licensee
|
||||
shall provide appropriate attribution to the Licensor, citing the Model's name and version, along with any
|
||||
relevant documentation or references specified by the Licensor.
|
||||
|
||||
5. No Warranty
|
||||
5.1 “As-Is” Basis: The Model, Derivatives, and Output are provided on an “as-is” and “as-available” basis,
|
||||
without any warranties or representations of any kind, whether express, implied, or statutory. The Licensor
|
||||
disclaims all warranties, including but not limited to, implied warranties of merchantability, fitness for
|
||||
a particular purpose, accuracy, reliability, non-infringement, or any warranty arising from the course of
|
||||
dealing or usage of trade.
|
||||
5.2 Performance and Reliability: The Licensor does not warrant or guarantee that the Model, Derivatives or
|
||||
Output will meet the Licensee’s requirements, that the operation of the Model, Derivatives or Output will
|
||||
be uninterrupted or error-free, or that defects in the Model will be corrected. The Licensee acknowledges
|
||||
that the use of the Model, Derivatives or Output is at its own risk and that the Model, Derivatives or
|
||||
Output may contain bugs, errors, or other limitations.
|
||||
5.3 No Endorsement: The Licensor does not endorse, approve, or certify any results, conclusions, or
|
||||
recommendations derived from the use of the Model. The Licensee is solely responsible for evaluating the
|
||||
accuracy, reliability, and suitability of the Model for its intended purposes.
|
||||
|
||||
6. Limitation of Liability
|
||||
6.1 No Liability for Damages: To the fullest extent permitted by applicable law, in no event shall the
|
||||
Licensor be liable for any special, incidental, indirect, consequential, exemplary, or punitive damages,
|
||||
including but not limited to, damages for loss of business profits, business interruption, loss of business
|
||||
information, loss of data, or any other pecuniary or non-pecuniary loss arising out of or in connection with
|
||||
the use or inability to use the Model, Derivatives or any Output, even if the Licensor has been advised of
|
||||
the possibility of such damages.
|
||||
6.2 Indemnification: The Licensee agrees to indemnify, defend, and hold harmless the Licensor, its
|
||||
affiliates, officers, directors, employees, and agents from and against any claims, liabilities, damages,
|
||||
losses, costs, or expenses (including reasonable attorneys' fees) arising out of or related to the
|
||||
Licensee's use of the Model, any Derivatives, or any Output, including any violation of this Agreement or
|
||||
applicable laws.
|
||||
|
||||
7. Termination
|
||||
7.1 Termination by Licensor: The Licensor reserves the right to terminate this Agreement and revoke the
|
||||
Licensee’s rights to use the Model at any time, with or without cause, and without prior notice if the
|
||||
Licensee breaches any of the terms or conditions of this Agreement. Termination shall be effective
|
||||
immediately upon notice.
|
||||
7.2 Effect of Termination: Upon termination of this Agreement, the Licensee must immediately cease all use
|
||||
of the Model and Derivatives and destroy all copies of the Model and Derivatives in its possession or
|
||||
control, including any backup or archival copies. The Licensee shall certify in writing to the Licensor that
|
||||
such destruction has been completed.
|
||||
7.3 Survival: The provisions of this Agreement that by their nature should survive termination, including
|
||||
but not limited to, Sections 4 (Ownership), 5 (No Warranty), 6 (Limitation of Liability), and this Section 7
|
||||
(Termination), shall continue to apply after termination.
|
||||
|
||||
8. Governing Law
|
||||
8.1 Governing Law: This Agreement shall be governed by and construed in accordance with the laws of the
|
||||
Republic of Korea, without regard to its conflict of laws principles.
|
||||
8.2 Arbitration: Any disputes, controversies, or claims arising out of or relating to this Agreement,
|
||||
including its existence, validity, interpretation, performance, breach, or termination, shall be referred
|
||||
to and finally resolved by arbitration administered by the Korean Commercial Arbitration Board (KCAB) in
|
||||
accordance with the International Arbitration Rules of the Korean Commercial Arbitration Board in force at
|
||||
the time of the commencement of the arbitration. The seat of arbitration shall be Seoul, Republic of Korea.
|
||||
The tribunal shall consist of one arbitrator. The language of the arbitration shall be English.
|
||||
|
||||
9. Alterations
|
||||
9.1 Modifications: The Licensor reserves the right to modify or amend this Agreement at any time, in its
|
||||
sole discretion. Any modifications will be effective upon posting the updated Agreement on the Licensor’s
|
||||
website or through other means of communication. The Licensee is responsible for reviewing the Agreement
|
||||
periodically for changes. Continued use of the Model after any modifications have been made constitutes
|
||||
acceptance of the revised Agreement.
|
||||
9.2 Entire Agreement: This Agreement constitutes the entire agreement between the Licensee and Licensor
|
||||
concerning the subject matter hereof and supersedes all prior or contemporaneous oral or written agreements,
|
||||
representations, or understandings. Any terms or conditions of any purchase order or other document
|
||||
submitted by the Licensee in connection with the Model that are in addition to, different from, or
|
||||
inconsistent with the terms and conditions of this Agreement are not binding on the Licensor and are void.
|
||||
|
||||
By downloading, installing, or using the EXAONE AI Model, the Licensee acknowledges that it has read,
|
||||
understood, and agrees to be bound by the terms and conditions of this Agreement.
|
||||
3
assets/EXAONE_Symbol+BI_3d.png
Normal file
3
assets/EXAONE_Symbol+BI_3d.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c473c63768e9303c02a4f968fd2e7d41df3f669fedc6a7b51c4398cfcd7f23e4
|
||||
size 249084
|
||||
146
chat_template.jinja
Normal file
146
chat_template.jinja
Normal file
@@ -0,0 +1,146 @@
|
||||
{%- if not skip_think is defined %}
|
||||
{%- set skip_think = true %}
|
||||
{%- endif %}
|
||||
|
||||
{%- set role_indicators = {
|
||||
'user': '[|user|]\n',
|
||||
'assistant': '[|assistant|]\n',
|
||||
'system': '[|system|]\n',
|
||||
'tool': '[|tool|]\n'
|
||||
} %}
|
||||
{%- set end_of_turn = '[|endofturn|]\n' %}
|
||||
|
||||
|
||||
{%- macro available_tools(tools) %}
|
||||
{{- "# Available Tools" }}
|
||||
{{- "\nYou can use none, one, or multiple of the following tools by calling them as functions to help with the user’s query." }}
|
||||
{{- "\nHere are the tools available to you in JSON format within <tool> and </tool> tags:\n" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "<tool>" }}
|
||||
{{- tool | tojson(ensure_ascii=False) | safe }}
|
||||
{{- "</tool>\n" }}
|
||||
{%- endfor %}
|
||||
|
||||
{{- "\nFor each function call you want to make, return a JSON object with function name and arguments within <tool_call> and </tool_call> tags, like:" }}
|
||||
{{- "\n<tool_call>{\"name\": function_1_name, \"arguments\": {argument_1_name: argument_1_value, argument_2_name: argument_2_value}}</tool_call>" }}
|
||||
{{- "\n<tool_call>{\"name\": function_2_name, \"arguments\": {...}}</tool_call>\n..." }}
|
||||
{{- "\nNote that if no argument name is specified for a tool, you can just print the argument value directly, without the argument name or JSON formatting." }}
|
||||
{%- endmacro %}
|
||||
|
||||
|
||||
{%- set ns = namespace(last_query_index = messages|length - 1) %}
|
||||
{%- for message in messages %}
|
||||
{%- if message.role == "user" and message.content is string %}
|
||||
{%- set ns.last_query_index = loop.index0 -%}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
|
||||
{%- for i in range(messages | length) %}
|
||||
{%- set msg = messages[i] %}
|
||||
{%- set role = msg.role %}
|
||||
{%- if role not in role_indicators %}
|
||||
{{- raise_exception('Unknown role: ' ~ role) }}
|
||||
{%- endif %}
|
||||
|
||||
{%- if i == 0 %}
|
||||
{%- if role == 'system' %}
|
||||
{{- role_indicators['system'] }}
|
||||
{{- msg.content }}
|
||||
{%- if tools is defined and tools %}
|
||||
{{- "\n\n" }}{{- available_tools(tools) }}
|
||||
{%- endif %}
|
||||
{{- end_of_turn -}}
|
||||
{%- continue %}
|
||||
{%- elif tools is defined and tools %}
|
||||
{{- role_indicators['system'] }}
|
||||
{{- available_tools(tools) }}
|
||||
{{- end_of_turn -}}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
|
||||
{%- if role == 'assistant' %}
|
||||
{{- role_indicators['assistant'] }}
|
||||
|
||||
{%- if msg.content %}
|
||||
{%- if "</think>" in msg.content %}
|
||||
{%- set content = msg.content.split('</think>')[-1].strip() %}
|
||||
{%- set reasoning_content = msg.content.split('</think>')[0].strip() %}
|
||||
{%- if reasoning_content.startswith("<think>") %}
|
||||
{%- set reasoning_content = reasoning_content[9:].strip() %}
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{%- set content = msg.content %}
|
||||
{%- endif %}
|
||||
|
||||
{%- if msg.reasoning_content %}
|
||||
{%- set reasoning_content = msg.reasoning_content %}
|
||||
{%- endif %}
|
||||
|
||||
{%- if (not skip_think and loop.last) and reasoning_content is defined %}
|
||||
{{- "<think>\n" }}
|
||||
{{- reasoning_content}}
|
||||
{{- "\n</think>\n\n" }}
|
||||
{%- else %}
|
||||
{{- "<think>\n\n</think>\n\n" }}
|
||||
{%- endif %}
|
||||
{{- content }}
|
||||
{%- endif %}
|
||||
|
||||
{%- if msg.tool_calls %}
|
||||
{%- if msg.content %}
|
||||
{{- "\n" }}
|
||||
{%- else %}
|
||||
{{- "<think>\n\n</think>\n\n" }}
|
||||
{%- endif %}
|
||||
{%- for tool_call in msg.tool_calls %}
|
||||
{%- if tool_call.function is defined %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
|
||||
{%- if tool_call.arguments is defined %}
|
||||
{%- set arguments = tool_call.arguments %}
|
||||
{%- elif tool_call.parameters is defined %}
|
||||
{%- set arguments = tool_call.parameters %}
|
||||
{%- else %}
|
||||
{{- raise_exception('arguments or parameters are mandatory: ' ~ tool_call) }}
|
||||
{%- endif %}
|
||||
|
||||
{{- "<tool_call>" }}{"name": "{{- tool_call.name }}", "arguments": {{ arguments | tojson(ensure_ascii=False) | safe }}}{{- "</tool_call>" }}
|
||||
|
||||
{%- if not loop.last %}
|
||||
{{- "\n" }}
|
||||
{%- endif %}
|
||||
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- end_of_turn -}}
|
||||
|
||||
{%- elif role == "tool" %}
|
||||
{%- if i == 0 or messages[i - 1].role != "tool" %}
|
||||
{{- role_indicators['tool'] }}
|
||||
{%- endif %}
|
||||
{%- if msg.content is defined %}
|
||||
{{- "<tool_result>" }}{"result": {{ msg.content | tojson(ensure_ascii=False) | safe }}}{{- "</tool_result>" }}
|
||||
{%- endif %}
|
||||
{%- if loop.last or messages[i + 1].role != "tool" %}
|
||||
{{- end_of_turn -}}
|
||||
{%- else %}
|
||||
{{- "\n" }}
|
||||
{%- endif %}
|
||||
|
||||
{%- else %}
|
||||
{{- role_indicators[role] }}
|
||||
{{- msg.content }}
|
||||
{{- end_of_turn -}}
|
||||
{%- endif %}
|
||||
{% endfor %}
|
||||
|
||||
|
||||
{%- if add_generation_prompt %}
|
||||
{{- role_indicators['assistant'] }}
|
||||
{%- if enable_thinking is defined and enable_thinking is true %}
|
||||
{{- "<think>\n" }}
|
||||
{%- else %}
|
||||
{{- "<think>\n\n</think>\n\n" }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
101
config.json
Normal file
101
config.json
Normal file
@@ -0,0 +1,101 @@
|
||||
{
|
||||
"architectures": [
|
||||
"Exaone4ForCausalLM"
|
||||
],
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 1,
|
||||
"eos_token_id": 361,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 5120,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 27392,
|
||||
"layer_types": [
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"full_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"full_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"full_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"full_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"full_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"full_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"full_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"full_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"full_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"full_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"full_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"full_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"full_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"full_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"full_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"full_attention"
|
||||
],
|
||||
"max_position_embeddings": 131072,
|
||||
"model_type": "exaone4",
|
||||
"num_attention_heads": 40,
|
||||
"num_hidden_layers": 64,
|
||||
"num_key_value_heads": 8,
|
||||
"pad_token_id": 0,
|
||||
"rms_norm_eps": 1e-05,
|
||||
"rope_scaling": {
|
||||
"factor": 16.0,
|
||||
"high_freq_factor": 4.0,
|
||||
"low_freq_factor": 1.0,
|
||||
"original_max_position_embeddings": 8192,
|
||||
"rope_type": "llama3"
|
||||
},
|
||||
"rope_theta": 1000000,
|
||||
"sliding_window": 4096,
|
||||
"sliding_window_pattern": "LLLG",
|
||||
"tie_word_embeddings": false,
|
||||
"torch_dtype": "bfloat16",
|
||||
"transformers_version": "4.54.0",
|
||||
"use_cache": true,
|
||||
"vocab_size": 102400
|
||||
}
|
||||
8
generation_config.json
Normal file
8
generation_config.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"_from_model_config": true,
|
||||
"bos_token_id": 1,
|
||||
"cache_implementation": "hybrid",
|
||||
"eos_token_id": 361,
|
||||
"pad_token_id": 0,
|
||||
"transformers_version": "4.54.0"
|
||||
}
|
||||
101783
merges.txt
Normal file
101783
merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
3
model-00001-of-00014.safetensors
Normal file
3
model-00001-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:566cae85b4776320a6e2148e1ff9b46dd16d81aebce90e31883cf45bd1c326dc
|
||||
size 4991311320
|
||||
3
model-00002-of-00014.safetensors
Normal file
3
model-00002-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1ca5d7ff40f12b94bce4bc7dc5f85dabd5b627f044fd60ec2d1c682cf9989b30
|
||||
size 4889097808
|
||||
3
model-00003-of-00014.safetensors
Normal file
3
model-00003-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:285343277f9734ce16d4d32305c8491015977334b96a5f0fe009048bdcfab1f9
|
||||
size 4836668200
|
||||
3
model-00004-of-00014.safetensors
Normal file
3
model-00004-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e68a031d65667d7af1d76779114ad42d567c27b80ddcf882acd0132b6dffd103
|
||||
size 4836668216
|
||||
3
model-00005-of-00014.safetensors
Normal file
3
model-00005-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c77f74197796949cf8363cee4e73ba8cb131471a1bb42ce79472190216faf359
|
||||
size 4836668216
|
||||
3
model-00006-of-00014.safetensors
Normal file
3
model-00006-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e5ec2f12eacea380e61a5be974c218871f1f10b5dc077352134d42bfb0fe0ae0
|
||||
size 4836668216
|
||||
3
model-00007-of-00014.safetensors
Normal file
3
model-00007-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a55005398c8436c17714fc45bac040eee9d99299b6a36397027eb393283b1ebf
|
||||
size 4836668216
|
||||
3
model-00008-of-00014.safetensors
Normal file
3
model-00008-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:48dc7e5d891e3d21e64681c03b5812345ca7175db06260cef6f1bc005cd4faf8
|
||||
size 4836668216
|
||||
3
model-00009-of-00014.safetensors
Normal file
3
model-00009-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e67bd8693c7b71af583b47ee4de0c7485b0eb63dd72fbd3983c0204cc1bc9eac
|
||||
size 4836668216
|
||||
3
model-00010-of-00014.safetensors
Normal file
3
model-00010-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:178611fa5a04149c454fc81930a9df87b604f4e6d1900511dbf242e37d1d4954
|
||||
size 4836668216
|
||||
3
model-00011-of-00014.safetensors
Normal file
3
model-00011-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c13356f6e6ed38444486a2e6cbfd29fccd2b2ea8ff61b7a753674a4f07144dbf
|
||||
size 4836668216
|
||||
3
model-00012-of-00014.safetensors
Normal file
3
model-00012-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a2b6d531f8c031b4fc739a3c466f8f7256fcf752712ad4c8635aecdbf725e3c5
|
||||
size 4836668216
|
||||
3
model-00013-of-00014.safetensors
Normal file
3
model-00013-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9b29ee8176189230d8446fba50e2698076b006bcfca5f91d50a8f36d9abf346f
|
||||
size 4710848216
|
||||
3
model-00014-of-00014.safetensors
Normal file
3
model-00014-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3c5f73f429056f9ee21eb9c355af6da2d79c46d310cd8cfc8c8cbc3fbfa46f1d
|
||||
size 1048576128
|
||||
715
model.safetensors.index.json
Normal file
715
model.safetensors.index.json
Normal file
@@ -0,0 +1,715 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_parameters": 32003216384,
|
||||
"total_size": 64006432768
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "model-00014-of-00014.safetensors",
|
||||
"model.embed_tokens.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.0.post_feedforward_layernorm.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.0.self_attn.k_norm.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.0.self_attn.q_norm.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.1.post_feedforward_layernorm.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.1.self_attn.k_norm.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.1.self_attn.q_norm.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.10.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.10.mlp.gate_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.10.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.10.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.10.post_feedforward_layernorm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.10.self_attn.k_norm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.10.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.10.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.11.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.11.mlp.gate_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.11.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.11.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.11.post_feedforward_layernorm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.11.self_attn.k_norm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.11.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.11.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.12.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.12.mlp.gate_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.12.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.12.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.12.post_feedforward_layernorm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.12.self_attn.k_norm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.12.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.12.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.13.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.13.mlp.gate_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.13.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.13.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.13.post_feedforward_layernorm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.13.self_attn.k_norm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.13.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.13.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.14.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.14.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.14.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.14.post_attention_layernorm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.14.post_feedforward_layernorm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.14.self_attn.k_norm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.14.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.14.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.15.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.15.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.15.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.15.post_attention_layernorm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.15.post_feedforward_layernorm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.15.self_attn.k_norm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.15.self_attn.o_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.15.self_attn.q_norm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.16.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.16.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.16.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.16.post_attention_layernorm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.16.post_feedforward_layernorm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.16.self_attn.k_norm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.16.self_attn.o_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.16.self_attn.q_norm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.17.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.17.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.17.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.17.post_attention_layernorm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.17.post_feedforward_layernorm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.17.self_attn.k_norm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.17.self_attn.o_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.17.self_attn.q_norm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.18.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.18.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.18.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.18.post_attention_layernorm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.18.post_feedforward_layernorm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.18.self_attn.k_norm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.18.self_attn.o_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.18.self_attn.q_norm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.19.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.19.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.19.mlp.up_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.19.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.19.post_feedforward_layernorm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.19.self_attn.k_norm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.19.self_attn.o_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.19.self_attn.q_norm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.2.post_feedforward_layernorm.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.2.self_attn.k_norm.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.2.self_attn.q_norm.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.20.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.20.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.20.mlp.up_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.20.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.20.post_feedforward_layernorm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.20.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.20.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.20.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.21.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.21.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.21.mlp.up_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.21.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.21.post_feedforward_layernorm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.21.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.21.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.21.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.22.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.22.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.22.mlp.up_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.22.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.22.post_feedforward_layernorm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.22.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.22.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.22.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.23.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.23.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.23.mlp.up_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.23.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.23.post_feedforward_layernorm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.23.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.23.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.23.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.24.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.24.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.24.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.24.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.24.post_feedforward_layernorm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.24.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.24.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.24.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.25.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.25.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.25.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.25.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.25.post_feedforward_layernorm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.25.self_attn.k_norm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.25.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.25.self_attn.q_norm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.26.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.26.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.26.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.26.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.26.post_feedforward_layernorm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.26.self_attn.k_norm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.26.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.26.self_attn.q_norm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.27.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.27.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.27.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.27.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.27.post_feedforward_layernorm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.27.self_attn.k_norm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.27.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.27.self_attn.q_norm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.28.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.28.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.28.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.28.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.28.post_feedforward_layernorm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.28.self_attn.k_norm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.28.self_attn.k_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.28.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.28.self_attn.q_norm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.28.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.28.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.29.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.29.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.29.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.29.post_attention_layernorm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.29.post_feedforward_layernorm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.29.self_attn.k_norm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.29.self_attn.k_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.29.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.29.self_attn.q_norm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.29.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.29.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.3.post_feedforward_layernorm.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.3.self_attn.k_norm.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.3.self_attn.q_norm.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.30.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.30.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.30.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.30.post_attention_layernorm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.30.post_feedforward_layernorm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.30.self_attn.k_norm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.30.self_attn.k_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.30.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.30.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.30.self_attn.q_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.30.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.31.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.31.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.31.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.31.post_attention_layernorm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.31.post_feedforward_layernorm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.31.self_attn.k_norm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.31.self_attn.k_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.31.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.31.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.31.self_attn.q_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.31.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.32.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.32.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.32.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.32.post_attention_layernorm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.32.post_feedforward_layernorm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.32.self_attn.k_norm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.32.self_attn.k_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.32.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.32.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.32.self_attn.q_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.32.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.33.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.33.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.33.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.33.post_attention_layernorm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.33.post_feedforward_layernorm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.33.self_attn.k_norm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.33.self_attn.k_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.33.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.33.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.33.self_attn.q_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.33.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.34.mlp.down_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.34.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.34.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.34.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.34.post_feedforward_layernorm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.34.self_attn.k_norm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.34.self_attn.k_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.34.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.34.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.34.self_attn.q_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.34.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.35.mlp.down_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.35.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.35.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.35.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.35.post_feedforward_layernorm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.35.self_attn.k_norm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.35.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.35.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.35.self_attn.q_norm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.35.self_attn.q_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.35.self_attn.v_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.36.mlp.down_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.36.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.36.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.36.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.36.post_feedforward_layernorm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.36.self_attn.k_norm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.36.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.36.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.36.self_attn.q_norm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.36.self_attn.q_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.36.self_attn.v_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.37.mlp.down_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.37.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.37.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.37.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.37.post_feedforward_layernorm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.37.self_attn.k_norm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.37.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.37.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.37.self_attn.q_norm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.37.self_attn.q_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.37.self_attn.v_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.38.mlp.down_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.38.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.38.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.38.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.38.post_feedforward_layernorm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.38.self_attn.k_norm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.38.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.38.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.38.self_attn.q_norm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.38.self_attn.q_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.38.self_attn.v_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.39.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.39.mlp.gate_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.39.mlp.up_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.39.post_attention_layernorm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.39.post_feedforward_layernorm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.39.self_attn.k_norm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.39.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.39.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.39.self_attn.q_norm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.39.self_attn.q_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.39.self_attn.v_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.4.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.4.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.4.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.4.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.4.post_feedforward_layernorm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.4.self_attn.k_norm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.4.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.4.self_attn.q_norm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.40.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.40.mlp.gate_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.40.mlp.up_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.40.post_attention_layernorm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.40.post_feedforward_layernorm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.40.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.40.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.40.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.40.self_attn.q_norm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.40.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.40.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.41.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.41.mlp.gate_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.41.mlp.up_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.41.post_attention_layernorm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.41.post_feedforward_layernorm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.41.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.41.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.41.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.41.self_attn.q_norm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.41.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.41.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.42.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.42.mlp.gate_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.42.mlp.up_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.42.post_attention_layernorm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.42.post_feedforward_layernorm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.42.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.42.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.42.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.42.self_attn.q_norm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.42.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.42.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.43.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.43.mlp.gate_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.43.mlp.up_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.43.post_attention_layernorm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.43.post_feedforward_layernorm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.43.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.43.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.43.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.43.self_attn.q_norm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.43.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.43.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.44.mlp.down_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.44.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.44.mlp.up_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.44.post_attention_layernorm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.44.post_feedforward_layernorm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.44.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.44.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.44.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.44.self_attn.q_norm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.44.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.44.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.45.mlp.down_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.45.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.45.mlp.up_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.45.post_attention_layernorm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.45.post_feedforward_layernorm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.45.self_attn.k_norm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.45.self_attn.k_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.45.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.45.self_attn.q_norm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.45.self_attn.q_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.45.self_attn.v_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.46.mlp.down_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.46.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.46.mlp.up_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.46.post_attention_layernorm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.46.post_feedforward_layernorm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.46.self_attn.k_norm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.46.self_attn.k_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.46.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.46.self_attn.q_norm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.46.self_attn.q_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.46.self_attn.v_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.47.mlp.down_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.47.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.47.mlp.up_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.47.post_attention_layernorm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.47.post_feedforward_layernorm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.47.self_attn.k_norm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.47.self_attn.k_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.47.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.47.self_attn.q_norm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.47.self_attn.q_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.47.self_attn.v_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.48.mlp.down_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.48.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.48.mlp.up_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.48.post_attention_layernorm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.48.post_feedforward_layernorm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.48.self_attn.k_norm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.48.self_attn.k_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.48.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.48.self_attn.q_norm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.48.self_attn.q_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.48.self_attn.v_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.49.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.49.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.49.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.49.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.49.post_feedforward_layernorm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.49.self_attn.k_norm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.49.self_attn.k_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.49.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.49.self_attn.q_norm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.49.self_attn.q_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.49.self_attn.v_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.5.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.5.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.5.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.5.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.5.post_feedforward_layernorm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.5.self_attn.k_norm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.5.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.5.self_attn.q_norm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.50.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.50.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.50.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.50.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.50.post_feedforward_layernorm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.50.self_attn.k_norm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.50.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.50.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.50.self_attn.q_norm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.50.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.50.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.51.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.51.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.51.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.51.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.51.post_feedforward_layernorm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.51.self_attn.k_norm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.51.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.51.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.51.self_attn.q_norm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.51.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.51.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.52.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.52.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.52.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.52.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.52.post_feedforward_layernorm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.52.self_attn.k_norm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.52.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.52.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.52.self_attn.q_norm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.52.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.52.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.53.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.53.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.53.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.53.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.53.post_feedforward_layernorm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.53.self_attn.k_norm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.53.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.53.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.53.self_attn.q_norm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.53.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.53.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.54.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.54.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.54.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.54.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.54.post_feedforward_layernorm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.54.self_attn.k_norm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.54.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.54.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.54.self_attn.q_norm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.54.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.54.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.55.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.55.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.55.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.55.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.55.post_feedforward_layernorm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.55.self_attn.k_norm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.55.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.55.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.55.self_attn.q_norm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.55.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.55.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.56.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.56.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.56.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.56.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.56.post_feedforward_layernorm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.56.self_attn.k_norm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.56.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.56.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.56.self_attn.q_norm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.56.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.56.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.57.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.57.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.57.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.57.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.57.post_feedforward_layernorm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.57.self_attn.k_norm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.57.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.57.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.57.self_attn.q_norm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.57.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.57.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.58.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.58.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.58.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.58.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.58.post_feedforward_layernorm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.58.self_attn.k_norm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.58.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.58.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.58.self_attn.q_norm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.58.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.58.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.59.mlp.down_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.59.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.59.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.59.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.59.post_feedforward_layernorm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.59.self_attn.k_norm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.59.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.59.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.59.self_attn.q_norm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.59.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.59.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.6.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.6.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.6.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.6.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.6.post_feedforward_layernorm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.6.self_attn.k_norm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.6.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.6.self_attn.q_norm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.60.mlp.down_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.60.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.60.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.60.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.60.post_feedforward_layernorm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.60.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.60.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.60.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.60.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.60.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.60.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.61.mlp.down_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.61.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.61.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.61.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.61.post_feedforward_layernorm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.61.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.61.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.61.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.61.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.61.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.61.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.62.mlp.down_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.62.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.62.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.62.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.62.post_feedforward_layernorm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.62.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.62.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.62.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.62.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.62.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.62.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.63.mlp.down_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.63.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.63.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.63.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.63.post_feedforward_layernorm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.63.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.63.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.63.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.63.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.63.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.63.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.7.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.7.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.7.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.7.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.7.post_feedforward_layernorm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.7.self_attn.k_norm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.7.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.7.self_attn.q_norm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.8.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.8.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.8.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.8.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.8.post_feedforward_layernorm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.8.self_attn.k_norm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.8.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.8.self_attn.q_norm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.9.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.9.mlp.gate_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.9.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.9.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.9.post_feedforward_layernorm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.9.self_attn.k_norm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.9.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.9.self_attn.q_norm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.norm.weight": "model-00013-of-00014.safetensors"
|
||||
}
|
||||
}
|
||||
336
special_tokens_map.json
Normal file
336
special_tokens_map.json
Normal file
@@ -0,0 +1,336 @@
|
||||
{
|
||||
"additional_special_tokens": [
|
||||
"[unused0]",
|
||||
"[unused1]",
|
||||
"[unused2]",
|
||||
"[unused3]",
|
||||
"[unused4]",
|
||||
"[unused5]",
|
||||
"[unused6]",
|
||||
"[unused7]",
|
||||
"[unused8]",
|
||||
"[unused9]",
|
||||
"[unused10]",
|
||||
"[unused11]",
|
||||
"[unused12]",
|
||||
"[unused13]",
|
||||
"[unused14]",
|
||||
"[unused15]",
|
||||
"[unused16]",
|
||||
"[unused17]",
|
||||
"[unused18]",
|
||||
"[unused19]",
|
||||
"[unused20]",
|
||||
"[unused21]",
|
||||
"[unused22]",
|
||||
"[unused23]",
|
||||
"[unused24]",
|
||||
"[unused25]",
|
||||
"[unused26]",
|
||||
"[unused27]",
|
||||
"[unused28]",
|
||||
"[unused29]",
|
||||
"[unused30]",
|
||||
"[unused31]",
|
||||
"[unused32]",
|
||||
"[unused33]",
|
||||
"[unused34]",
|
||||
"[unused35]",
|
||||
"[unused36]",
|
||||
"[unused37]",
|
||||
"[unused38]",
|
||||
"[unused39]",
|
||||
"[unused40]",
|
||||
"[unused41]",
|
||||
"[unused42]",
|
||||
"[unused43]",
|
||||
"[unused44]",
|
||||
"[unused45]",
|
||||
"[unused46]",
|
||||
"[unused47]",
|
||||
"[unused48]",
|
||||
"[unused49]",
|
||||
"[unused50]",
|
||||
"[unused51]",
|
||||
"[unused52]",
|
||||
"[unused53]",
|
||||
"[unused54]",
|
||||
"[unused55]",
|
||||
"[unused56]",
|
||||
"[unused57]",
|
||||
"[unused58]",
|
||||
"[unused59]",
|
||||
"[unused60]",
|
||||
"[unused61]",
|
||||
"[unused62]",
|
||||
"[unused63]",
|
||||
"[unused64]",
|
||||
"[unused65]",
|
||||
"[unused66]",
|
||||
"[unused67]",
|
||||
"[unused68]",
|
||||
"[unused69]",
|
||||
"[unused70]",
|
||||
"[unused71]",
|
||||
"[unused72]",
|
||||
"[unused73]",
|
||||
"[unused74]",
|
||||
"[unused75]",
|
||||
"[unused76]",
|
||||
"[unused77]",
|
||||
"[unused78]",
|
||||
"[unused79]",
|
||||
"[unused80]",
|
||||
"[unused81]",
|
||||
"[unused82]",
|
||||
"[unused83]",
|
||||
"[unused84]",
|
||||
"[unused85]",
|
||||
"[unused86]",
|
||||
"[unused87]",
|
||||
"[unused88]",
|
||||
"[unused89]",
|
||||
"[unused90]",
|
||||
"[unused91]",
|
||||
"[unused92]",
|
||||
"[unused93]",
|
||||
"[unused94]",
|
||||
"[unused95]",
|
||||
"[unused96]",
|
||||
"[unused97]",
|
||||
"[unused98]",
|
||||
"[unused99]",
|
||||
"[extra_id_0]",
|
||||
"[extra_id_1]",
|
||||
"[extra_id_2]",
|
||||
"[extra_id_3]",
|
||||
"[extra_id_4]",
|
||||
"[extra_id_5]",
|
||||
"[extra_id_6]",
|
||||
"[extra_id_7]",
|
||||
"[extra_id_8]",
|
||||
"[extra_id_9]",
|
||||
"[extra_id_10]",
|
||||
"[extra_id_11]",
|
||||
"[extra_id_12]",
|
||||
"[extra_id_13]",
|
||||
"[extra_id_14]",
|
||||
"[extra_id_15]",
|
||||
"[extra_id_16]",
|
||||
"[extra_id_17]",
|
||||
"[extra_id_18]",
|
||||
"[extra_id_19]",
|
||||
"[extra_id_20]",
|
||||
"[extra_id_21]",
|
||||
"[extra_id_22]",
|
||||
"[extra_id_23]",
|
||||
"[extra_id_24]",
|
||||
"[extra_id_25]",
|
||||
"[extra_id_26]",
|
||||
"[extra_id_27]",
|
||||
"[extra_id_28]",
|
||||
"[extra_id_29]",
|
||||
"[extra_id_30]",
|
||||
"[extra_id_31]",
|
||||
"[extra_id_32]",
|
||||
"[extra_id_33]",
|
||||
"[extra_id_34]",
|
||||
"[extra_id_35]",
|
||||
"[extra_id_36]",
|
||||
"[extra_id_37]",
|
||||
"[extra_id_38]",
|
||||
"[extra_id_39]",
|
||||
"[extra_id_40]",
|
||||
"[extra_id_41]",
|
||||
"[extra_id_42]",
|
||||
"[extra_id_43]",
|
||||
"[extra_id_44]",
|
||||
"[extra_id_45]",
|
||||
"[extra_id_46]",
|
||||
"[extra_id_47]",
|
||||
"[extra_id_48]",
|
||||
"[extra_id_49]",
|
||||
"[extra_id_50]",
|
||||
"[extra_id_51]",
|
||||
"[extra_id_52]",
|
||||
"[extra_id_53]",
|
||||
"[extra_id_54]",
|
||||
"[extra_id_55]",
|
||||
"[extra_id_56]",
|
||||
"[extra_id_57]",
|
||||
"[extra_id_58]",
|
||||
"[extra_id_59]",
|
||||
"[extra_id_60]",
|
||||
"[extra_id_61]",
|
||||
"[extra_id_62]",
|
||||
"[extra_id_63]",
|
||||
"[extra_id_64]",
|
||||
"[extra_id_65]",
|
||||
"[extra_id_66]",
|
||||
"[extra_id_67]",
|
||||
"[extra_id_68]",
|
||||
"[extra_id_69]",
|
||||
"[extra_id_70]",
|
||||
"[extra_id_71]",
|
||||
"[extra_id_72]",
|
||||
"[extra_id_73]",
|
||||
"[extra_id_74]",
|
||||
"[extra_id_75]",
|
||||
"[extra_id_76]",
|
||||
"[extra_id_77]",
|
||||
"[extra_id_78]",
|
||||
"[extra_id_79]",
|
||||
"[extra_id_80]",
|
||||
"[extra_id_81]",
|
||||
"[extra_id_82]",
|
||||
"[extra_id_83]",
|
||||
"[extra_id_84]",
|
||||
"[extra_id_85]",
|
||||
"[extra_id_86]",
|
||||
"[extra_id_87]",
|
||||
"[extra_id_88]",
|
||||
"[extra_id_89]",
|
||||
"[extra_id_90]",
|
||||
"[extra_id_91]",
|
||||
"[extra_id_92]",
|
||||
"[extra_id_93]",
|
||||
"[extra_id_94]",
|
||||
"[extra_id_95]",
|
||||
"[extra_id_96]",
|
||||
"[extra_id_97]",
|
||||
"[extra_id_98]",
|
||||
"[extra_id_99]",
|
||||
"[extra_id_100]",
|
||||
"[extra_id_101]",
|
||||
"[extra_id_102]",
|
||||
"[extra_id_103]",
|
||||
"[extra_id_104]",
|
||||
"[extra_id_105]",
|
||||
"[extra_id_106]",
|
||||
"[extra_id_107]",
|
||||
"[extra_id_108]",
|
||||
"[extra_id_109]",
|
||||
"[extra_id_110]",
|
||||
"[extra_id_111]",
|
||||
"[extra_id_112]",
|
||||
"[extra_id_113]",
|
||||
"[extra_id_114]",
|
||||
"[extra_id_115]",
|
||||
"[extra_id_116]",
|
||||
"[extra_id_117]",
|
||||
"[extra_id_118]",
|
||||
"[extra_id_119]",
|
||||
"[extra_id_120]",
|
||||
"[extra_id_121]",
|
||||
"[extra_id_122]",
|
||||
"[extra_id_123]",
|
||||
"[extra_id_124]",
|
||||
"[extra_id_125]",
|
||||
"[extra_id_126]",
|
||||
"[extra_id_127]",
|
||||
"[extra_id_128]",
|
||||
"[extra_id_129]",
|
||||
"[extra_id_130]",
|
||||
"[extra_id_131]",
|
||||
"[extra_id_132]",
|
||||
"[extra_id_133]",
|
||||
"[extra_id_134]",
|
||||
"[extra_id_135]",
|
||||
"[extra_id_136]",
|
||||
"[extra_id_137]",
|
||||
"[extra_id_138]",
|
||||
"[extra_id_139]",
|
||||
"[extra_id_140]",
|
||||
"[extra_id_141]",
|
||||
"[extra_id_142]",
|
||||
"[extra_id_143]",
|
||||
"[extra_id_144]",
|
||||
"[extra_id_145]",
|
||||
"[extra_id_146]",
|
||||
"[extra_id_147]",
|
||||
"[extra_id_148]",
|
||||
"[extra_id_149]",
|
||||
"[extra_id_150]",
|
||||
"[extra_id_151]",
|
||||
"[extra_id_152]",
|
||||
"[extra_id_153]",
|
||||
"[extra_id_154]",
|
||||
"[extra_id_155]",
|
||||
"[extra_id_156]",
|
||||
"[extra_id_157]",
|
||||
"[extra_id_158]",
|
||||
"[extra_id_159]",
|
||||
"[extra_id_160]",
|
||||
"[extra_id_161]",
|
||||
"[extra_id_162]",
|
||||
"[extra_id_163]",
|
||||
"[extra_id_164]",
|
||||
"[extra_id_165]",
|
||||
"[extra_id_166]",
|
||||
"[extra_id_167]",
|
||||
"[extra_id_168]",
|
||||
"[extra_id_169]",
|
||||
"[extra_id_170]",
|
||||
"[extra_id_171]",
|
||||
"[extra_id_172]",
|
||||
"[extra_id_173]",
|
||||
"[extra_id_174]",
|
||||
"[extra_id_175]",
|
||||
"[extra_id_176]",
|
||||
"[extra_id_177]",
|
||||
"[extra_id_178]",
|
||||
"[extra_id_179]",
|
||||
"[extra_id_180]",
|
||||
"[extra_id_181]",
|
||||
"[extra_id_182]",
|
||||
"[extra_id_183]",
|
||||
"[extra_id_184]",
|
||||
"[extra_id_185]",
|
||||
"[extra_id_186]",
|
||||
"[extra_id_187]",
|
||||
"[extra_id_188]",
|
||||
"[|system|]",
|
||||
"[|tool|]",
|
||||
"[|assistant|]",
|
||||
"[|user|]",
|
||||
"[|endofturn|]",
|
||||
"PI:URL",
|
||||
"PI:EMAIL",
|
||||
"PI:ACCOUNT_NUM",
|
||||
"PI:PHONE_NUM",
|
||||
"PI:BUSINESS_NUM",
|
||||
"PI:ANNON",
|
||||
"PI:KEY",
|
||||
"PI:ID",
|
||||
"PI:IP_ADDRESS",
|
||||
"PI:USER"
|
||||
],
|
||||
"bos_token": {
|
||||
"content": "[BOS]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"eos_token": {
|
||||
"content": "[|endofturn|]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": {
|
||||
"content": "[PAD]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"unk_token": {
|
||||
"content": "[UNK]",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
512826
tokenizer.json
Normal file
512826
tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
3219
tokenizer_config.json
Normal file
3219
tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
1
vocab.json
Normal file
1
vocab.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user