初始化项目,由ModelHub XC社区提供模型

Model: Delta-Vector/Rei-24B-KTO
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-19 11:57:51 +08:00
commit 8c0642cb2c
19 changed files with 10901 additions and 0 deletions

36
.gitattributes vendored Normal file
View File

@@ -0,0 +1,36 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
tokenizer.json filter=lfs diff=lfs merge=lfs -text

326
README.md Normal file
View File

@@ -0,0 +1,326 @@
---
library_name: transformers
tags:
- fine-tuning
- prose
- KTO
- axolotl
- finetune
- roleplaying
- creative-writing
base_model:
- Delta-Vector/Rei-24B-Base
---
<style>
@import url('https://fonts.googleapis.com/css2?family=Share+Tech+Mono&display=swap');
body {
font-family: 'Share Tech Mono', monospace;
background: #121212;
color: #E0E0E0;
margin: 0;
padding: 0;
font-size: 16px;
}
.container {
margin: 20px auto;
background-color: #1E1E1E;
padding: 20px;
border-radius: 8px;
box-shadow: 0 0 25px rgba(255, 0, 51, 0.3);
border: 1px solid #FF0033;
position: relative;
max-width: 1000px;
}
.header h1 {
font-size: 32px;
color: #FF0033;
margin: 0 0 20px 0;
text-align: center;
text-shadow: 0 0 10px rgba(255, 0, 51, 0.7);
text-transform: uppercase;
}
.section {
margin-top: 30px;
}
.section h2 {
font-size: 26px;
color: #FF0033;
text-align: center;
border-bottom: 1px solid #FF0033;
padding-bottom: 10px;
margin-bottom: 20px;
}
.info p {
color: #E0E0E0;
line-height: 1.7;
font-size: 16px;
}
.info img {
width: 100%;
max-width: 600px;
border-radius: 4px;
margin: 0 auto 15px;
display: block;
box-shadow: 0 0 20px rgba(0, 0, 0, 0.5);
border: 1px solid #444;
}
a {
color: #FF0033;
text-decoration: none;
transition: color 0.2s ease, text-shadow 0.2s ease;
}
a:hover {
color: #FFFFFF;
text-shadow: 0 0 8px rgba(255, 0, 51, 0.8);
}
.button {
display: inline-block;
background-color: transparent;
color: #FF0033;
padding: 10px 20px;
border-radius: 4px;
cursor: pointer;
text-decoration: none;
border: 1px solid #FF0033;
transition: background-color 0.2s ease, box-shadow 0.2s ease, color 0.2s ease;
}
.button:hover {
background-color: #FF0033;
color: #121212;
box-shadow: 0 0 15px rgba(255, 0, 51, 0.7);
}
pre {
background-color: #0d0d0d;
padding: 15px;
border-radius: 4px;
overflow-x: auto;
border: 1px solid #444;
border-left: 3px solid #FF0033;
color: #E0E0E0;
}
code {
font-family: 'Share Tech Mono', monospace;
color: #E0E0E0;
}
.info-card {
background: #2a2a2a;
border: 1px solid #444;
border-radius: 8px;
overflow: hidden;
}
.info-header {
background: #1a1a1a;
padding: 20px;
border-bottom: 1px solid #444;
}
.info-header h3 {
color: #FFFFFF;
margin: 0 0 10px 0;
font-size: 22px;
text-align: center;
}
.model-tags {
display: flex;
justify-content: center;
gap: 10px;
flex-wrap: wrap;
}
.model-tag {
background: #121212;
color: #FF0033;
padding: 5px 10px;
border-radius: 4px;
font-size: 12px;
border: 1px solid #FF0033;
text-transform: uppercase;
}
.model-composition {
padding: 20px;
border-bottom: 1px solid #444;
}
.model-composition h4 {
color: #FFFFFF;
margin: 0 0 15px 0;
font-size: 18px;
text-align: center;
}
.composition-list {
list-style: none;
padding: 0;
margin: 0;
display: grid;
gap: 10px;
}
.composition-list li {
color: #E0E0E0;
display: flex;
align-items: baseline;
gap: 8px;
}
.model-component {
font-weight: bold;
min-width: 120px;
}
.model-description {
padding: 20px;
background: #2a2a2a;
}
.metrics-section {
margin-bottom: 30px;
}
.metrics-section details {
background: #2a2a2a;
border: 1px solid #444;
border-radius: 8px;
padding: 15px;
margin-bottom: 15px;
}
.metrics-section summary {
color: #FF0033;
font-size: 18px;
cursor: pointer;
outline: none;
padding: 5px 0;
text-align: center;
font-weight: bold;
}
.creator-section {
margin: 20px 0;
}
.creator-badge {
display: inline-flex;
align-items: center;
background: #1a1a1a;
border: 1px solid #444;
border-radius: 4px;
padding: 10px 15px;
}
.creator-label {
color: #E0E0E0;
font-size: 14px;
margin-right: 8px;
}
.creator-link {
display: flex;
align-items: center;
gap: 5px;
color: #FF0033;
text-decoration: none;
transition: all 0.2s ease;
}
.creator-name {
font-weight: 600;
}
.creator-arrow {
font-size: 16px;
transition: transform 0.2s ease;
}
.creator-link:hover .creator-arrow {
transform: translateX(5px);
}
.link-arrow {
display: inline-block;
transition: transform 0.2s ease;
}
a:hover .link-arrow {
transform: translateX(5px);
}
.axolotl-container {
text-align: center;
margin: 30px 0;
filter: invert(1) hue-rotate(180deg);
}
.axolotl-container img {
max-width: 300px;
border-radius: 8px;
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.3);
border: 1px solid #000000;
}
</style>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Rei-KTO-24B</title>
</head>
<body>
<div class="container">
<div class="header">
<h1>Rei-KTO-24B</h1>
</div>
<div class="info">
<img src="https://cdn-uploads.huggingface.co/production/uploads/66c26b6fb01b19d8c3c2467b/EFNIBpVv1OITZl_su5DyR.png" alt="Model banner">
<div style="text-align: center;">
<div class="creator-section">
<div class="creator-badge">
<span class="creator-label">Created by</span>
<a href="https://huggingface.co/Delta-Vector" target="_blank" class="creator-link">
<span class="creator-name">Delta-Vector</span>
<span class="creator-arrow"></span>
</a>
</div>
</div>
<div class="model-info">
<h2>Model Information</h2>
<div class="info-card">
<div class="info-header">
<h>Rei-KTO-24B</h3>
<div class="model-tags">
<span class="model-tag">KTO enhanced</span>
<span class="model-tag">Painted Fantasy Finetune</span>
<span class="model-tag">Creative Prose</span>
</div>
</div>
<div class="model-description">
<p>A model meant to replicate the style and prose of the Anthropic Claude models, Opus and Sonnet. This model is meant for Roleplaying/Creative-writing, Has some nice smarts without being too sloppy, etc - It's pretty good. Trained in 2 steps, Firstly SFT trained on Zerofata's PaintedFantasy which i found great at anime-otaku-esque characters, and then KTO'd to improve coherency and Instruct Following </p>
</div>
</div>
</div>
<div class="section">
<h2>Quantized Versions</h2>
<div class="info-card">
<div class="model-composition">
<h4>Available Downloads</h4>
<ul class="composition-list">
<li><span class="model-component"><a href="https://huggingface.co/mradermacher/Rei-24B-KTO-GGUF" target="_blank">GGUF Format</a></span>For use with LLama.cpp & Forks (Ty Mradermacher <3)</li>
<li><span class="model-component"><a href="https://huggingface.co/DeathGodlike/Rei-24B-KTO_EXL3" target="_blank">EXL3 Format</a></span>For use with TabbyAPI</li>
</ul>
</div>
</div>
</div>
<div class="section">
<h2>Prompting</h2>
<p>The model is tuned with V7 Tekken formatting. A typical input would look like this:</p>
<pre><code><span style="color:#FF0033;"><s>[SYSTEM_PROMPT]system_prompt[/SYSTEM_PROMPT][INST]Hi there![/INST]Nice to meet you!</s>[INST]Can I ask a question?[/INST]
</code></pre>
</div>
<div class="section">
<h2>Training</h2>
<p>Training was done in 2 steps, SFT>KTO</p>
<div class="metrics-section">
<details>
<summary>Access Configs</summary>
<pre><code> SFT: https://wandb.ai/new-eden/Painted-Fantasy-Rei/artifacts/axolotl-config/config-u7to9d5q/v0/files/axolotl_config_f0p7vnaf.yml
KTO : https://wandb.ai/new-eden/Painted-Rei/artifacts/axolotl-config/config-8n37w77c/v0/files/axolotl_config_hvrd2tzn.yml
</code></pre>
</details>
</div>
</div>
<div class="section">
<h2>Training</h2>
<p>The training was done for 2 epochs using 8 x A100s for 24 hours/p>
<p style="text-align: center; margin-top: 20px;">
<div class="axolotl-container">
<a href="https://github.com/OpenAccess-AI-Collective/axolotl" target="_blank">
<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl">
</a>
</div>
<div class="section">
<h2>Credits</h2>
<p>Thank you to <a href="https://huggingface.co/lucyknada">Lucy Knada</a>, <a href="https://huggingface.co/Ateron">Ateron</a>, <a href="https://huggingface.co/AliCat2">Alicat</a>, <a href="https://huggingface.co/intervitens">Intervitens</a>, <a href="https://huggingface.co/cgato">Cgato</a>, <a href="https://huggingface.co/kubernetes-bad">Kubernetes Bad</a> and the rest of <a href="https://huggingface.co/anthracite-org">Anthracite</a>.</p>
</div>
</div>
</div>
</body>
</html>

51
chat_template.jinja Normal file
View File

@@ -0,0 +1,51 @@
{%- set today = strftime_now("%Y-%m-%d") %}
{%- set default_system_message = "You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris.\nYour knowledge base was last updated on 2023-10-01. The current date is " + today + ".\n\nWhen you're not sure about some information, you say that you don't have the information and don't make up anything.\nIf the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. \"What are some good restaurants around me?\" => \"Where are you?\" or \"When is the next flight to Tokyo\" => \"Where do you travel from?\")" %}
{{- bos_token }}
{%- if messages[0]['role'] == 'system' %}
{%- if messages[0]['content'] is string %}
{%- set system_message = messages[0]['content'] %}
{%- else %}
{%- set system_message = messages[0]['content'][0]['text'] %}
{%- endif %}
{%- set loop_messages = messages[1:] %}
{%- else %}
{%- set system_message = default_system_message %}
{%- set loop_messages = messages %}
{%- endif %}
{{- '[SYSTEM_PROMPT]' + system_message + '[/SYSTEM_PROMPT]' }}
{%- for message in loop_messages %}
{%- if message['role'] == 'user' %}
{%- if message['content'] is string %}
{{- '[INST]' + message['content'] + '[/INST]' }}
{%- else %}
{{- '[INST]' }}
{%- for block in message['content'] %}
{%- if block['type'] == 'text' %}
{{- block['text'] }}
{%- elif block['type'] in ['image', 'image_url'] %}
{{- '[IMG]' }}
{%- else %}
{{- raise_exception('Only text and image blocks are supported in message content!') }}
{%- endif %}
{%- endfor %}
{{- '[/INST]' }}
{%- endif %}
{%- elif message['role'] == 'system' %}
{%- if message['content'] is string %}
{{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}
{%- else %}
{{- '[SYSTEM_PROMPT]' + message['content'][0]['text'] + '[/SYSTEM_PROMPT]' }}
{%- endif %}
{%- elif message['role'] == 'assistant' %}
{%- if message['content'] is string %}
{{- message['content'] + eos_token }}
{%- else %}
{{- message['content'][0]['text'] + eos_token }}
{%- endif %}
{%- else %}
{{- raise_exception('Only user, system and assistant roles are supported!') }}
{%- endif %}
{%- endfor %}

26
config.json Normal file
View File

@@ -0,0 +1,26 @@
{
"architectures": [
"MistralForCausalLM"
],
"attention_dropout": 0.0,
"bos_token_id": 1,
"eos_token_id": 2,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 5120,
"initializer_range": 0.02,
"intermediate_size": 32768,
"max_position_embeddings": 131072,
"model_type": "mistral",
"num_attention_heads": 32,
"num_hidden_layers": 40,
"num_key_value_heads": 8,
"rms_norm_eps": 1e-05,
"rope_theta": 1000000000.0,
"sliding_window": null,
"tie_word_embeddings": false,
"torch_dtype": "bfloat16",
"transformers_version": "4.53.1",
"use_cache": true,
"vocab_size": 131072
}

7
generation_config.json Normal file
View File

@@ -0,0 +1,7 @@
{
"_from_model_config": true,
"bos_token_id": 1,
"do_sample": true,
"eos_token_id": 2,
"transformers_version": "4.53.1"
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b9b4cc3c9c276caa374f2eb7c82a838ccc5f1cc3d426267ad4adefd934a97b4d
size 4781571736

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:cab8dcb3bbede0455b28dfb80692414db91fb2ae6e79342b98ca7fbecae79d3e
size 4781592784

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:db1cf23e5c2de4c57abb13800c847facb7ea914733ccf8c5e3fc7660a0837b22
size 4781592800

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b35b71354f8f860ed53fd1c578590e99eb4b3954920ce5067a781f9087168295
size 4886471600

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:fcf983ad514ffeef4d0d3559358882fd54e97017002aaffe6bb2f4349fbbf2a0
size 4781592824

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5be6481070f5e0a58908791c9a15977bb573025f250ac0145874250d4768a6b1
size 4781592816

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:fe61b97b759fe04cf0aafa22ee1bde2612da688147b16328f72a36d3b3c9cac8
size 4886471600

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5694220fb02c1d8a118d474836d902c3aac10628a9663e2bf6721dc59bfdf039
size 4781592824

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:12d552caf65da1ef090b8bbfdbeb8e4218771f5d7f517d9951b64db6798f79a2
size 4781592816

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d93d37682ef34f2c58fe073d5ada793e7c54526574f61f64fd194a6f36a3f802
size 3900777072

View File

@@ -0,0 +1,371 @@
{
"metadata": {
"total_parameters": 23572403200,
"total_size": 47144806400
},
"weight_map": {
"lm_head.weight": "model-00010-of-00010.safetensors",
"model.embed_tokens.weight": "model-00001-of-00010.safetensors",
"model.layers.0.input_layernorm.weight": "model-00001-of-00010.safetensors",
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.input_layernorm.weight": "model-00001-of-00010.safetensors",
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.10.input_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.10.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.10.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.10.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.10.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.10.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.10.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.10.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.10.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.11.input_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.11.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.11.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.11.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.11.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.11.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.11.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.11.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.11.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.12.input_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.12.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.12.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.12.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.12.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.12.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.12.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.12.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.12.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.13.input_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.13.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.13.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.13.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.13.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.13.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.13.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.13.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.13.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.14.input_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.14.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.14.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.14.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.14.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.14.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.14.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.14.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.14.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.15.input_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.15.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.15.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.15.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.15.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.15.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.15.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.15.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.15.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.16.input_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.16.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.16.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.16.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.16.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.16.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.16.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.16.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.16.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.17.input_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.17.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.17.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.17.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.17.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.17.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.17.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.17.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.17.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.18.input_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.18.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.18.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.18.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.18.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.18.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.18.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.18.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.18.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.19.input_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.19.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.19.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.19.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.19.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.19.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.19.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.19.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.19.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.2.input_layernorm.weight": "model-00001-of-00010.safetensors",
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.20.input_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.20.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.20.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.20.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.20.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.20.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.20.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.20.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.20.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.21.input_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.21.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.21.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.21.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.21.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.21.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.21.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.21.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.21.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.22.input_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.22.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.22.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.22.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.22.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.22.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.22.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.22.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.22.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.23.input_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.23.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.23.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.23.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.23.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.23.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.23.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.23.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.23.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.24.input_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.24.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.24.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.24.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.24.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.24.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.24.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.24.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.24.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.25.input_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.25.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.25.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.25.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.25.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.25.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.25.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.25.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.25.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.26.input_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.26.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.26.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.26.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.26.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.26.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.26.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.26.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.26.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.27.input_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.27.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.27.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.27.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.27.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.27.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.27.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.27.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.27.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.28.input_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.28.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.28.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.28.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.28.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.28.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.28.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.28.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.28.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.29.input_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.29.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.29.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.29.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.29.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.29.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.29.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.29.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.29.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.3.input_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.3.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.3.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.3.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.3.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.30.input_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.30.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.30.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.30.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.30.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.30.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.30.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.30.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.30.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.31.input_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.31.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.31.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.31.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.31.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.31.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.31.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.31.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.31.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.32.input_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.32.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.32.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.32.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.32.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.32.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.32.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.32.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.32.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.33.input_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.33.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.33.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.33.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.33.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.33.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.33.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.33.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.33.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.34.input_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.34.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.34.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.34.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.34.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.34.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.34.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.34.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.34.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.35.input_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.35.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.35.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.35.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.35.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.35.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.35.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.35.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.35.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.36.input_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.36.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.36.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.36.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.36.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.36.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.36.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.36.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.36.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.37.input_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.37.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.37.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.37.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.37.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.37.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.37.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.37.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.37.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.38.input_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.38.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.38.mlp.gate_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.38.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.38.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.38.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.38.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.38.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.38.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.39.input_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.39.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.39.mlp.gate_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.39.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.39.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.39.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.39.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.39.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.39.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.4.input_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.4.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.4.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.4.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.4.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.4.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.4.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.4.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.4.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.5.input_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.5.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.5.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.5.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.5.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.5.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.5.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.5.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.5.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.6.input_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.6.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.6.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.6.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.6.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.6.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.6.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.6.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.6.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.7.input_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.7.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.7.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.7.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.7.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.7.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.7.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.7.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.7.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.8.input_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.8.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.8.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.8.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.8.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.8.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.8.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.8.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.8.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.9.input_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.9.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.9.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.9.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.9.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.9.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.9.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.9.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.9.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
"model.norm.weight": "model-00010-of-00010.safetensors"
}
}

1032
special_tokens_map.json Normal file

File diff suppressed because it is too large Load Diff

3
tokenizer.json Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b76085f9923309d873994d444989f7eb6ec074b06f25b58f1e8d7b7741070949
size 17078037

9019
tokenizer_config.json Normal file

File diff suppressed because it is too large Load Diff