初始化项目,由ModelHub XC社区提供模型
Model: Delta-Vector/Rei-24B-KTO Source: Original Platform
This commit is contained in:
36
.gitattributes
vendored
Normal file
36
.gitattributes
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
326
README.md
Normal file
326
README.md
Normal file
@@ -0,0 +1,326 @@
|
||||
---
|
||||
library_name: transformers
|
||||
tags:
|
||||
- fine-tuning
|
||||
- prose
|
||||
- KTO
|
||||
- axolotl
|
||||
- finetune
|
||||
- roleplaying
|
||||
- creative-writing
|
||||
base_model:
|
||||
- Delta-Vector/Rei-24B-Base
|
||||
---
|
||||
<style>
|
||||
@import url('https://fonts.googleapis.com/css2?family=Share+Tech+Mono&display=swap');
|
||||
|
||||
body {
|
||||
font-family: 'Share Tech Mono', monospace;
|
||||
background: #121212;
|
||||
color: #E0E0E0;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
font-size: 16px;
|
||||
}
|
||||
.container {
|
||||
margin: 20px auto;
|
||||
background-color: #1E1E1E;
|
||||
padding: 20px;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 0 25px rgba(255, 0, 51, 0.3);
|
||||
border: 1px solid #FF0033;
|
||||
position: relative;
|
||||
max-width: 1000px;
|
||||
}
|
||||
.header h1 {
|
||||
font-size: 32px;
|
||||
color: #FF0033;
|
||||
margin: 0 0 20px 0;
|
||||
text-align: center;
|
||||
text-shadow: 0 0 10px rgba(255, 0, 51, 0.7);
|
||||
text-transform: uppercase;
|
||||
}
|
||||
.section {
|
||||
margin-top: 30px;
|
||||
}
|
||||
.section h2 {
|
||||
font-size: 26px;
|
||||
color: #FF0033;
|
||||
text-align: center;
|
||||
border-bottom: 1px solid #FF0033;
|
||||
padding-bottom: 10px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
.info p {
|
||||
color: #E0E0E0;
|
||||
line-height: 1.7;
|
||||
font-size: 16px;
|
||||
}
|
||||
.info img {
|
||||
width: 100%;
|
||||
max-width: 600px;
|
||||
border-radius: 4px;
|
||||
margin: 0 auto 15px;
|
||||
display: block;
|
||||
box-shadow: 0 0 20px rgba(0, 0, 0, 0.5);
|
||||
border: 1px solid #444;
|
||||
}
|
||||
a {
|
||||
color: #FF0033;
|
||||
text-decoration: none;
|
||||
transition: color 0.2s ease, text-shadow 0.2s ease;
|
||||
}
|
||||
a:hover {
|
||||
color: #FFFFFF;
|
||||
text-shadow: 0 0 8px rgba(255, 0, 51, 0.8);
|
||||
}
|
||||
.button {
|
||||
display: inline-block;
|
||||
background-color: transparent;
|
||||
color: #FF0033;
|
||||
padding: 10px 20px;
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
text-decoration: none;
|
||||
border: 1px solid #FF0033;
|
||||
transition: background-color 0.2s ease, box-shadow 0.2s ease, color 0.2s ease;
|
||||
}
|
||||
.button:hover {
|
||||
background-color: #FF0033;
|
||||
color: #121212;
|
||||
box-shadow: 0 0 15px rgba(255, 0, 51, 0.7);
|
||||
}
|
||||
pre {
|
||||
background-color: #0d0d0d;
|
||||
padding: 15px;
|
||||
border-radius: 4px;
|
||||
overflow-x: auto;
|
||||
border: 1px solid #444;
|
||||
border-left: 3px solid #FF0033;
|
||||
color: #E0E0E0;
|
||||
}
|
||||
code {
|
||||
font-family: 'Share Tech Mono', monospace;
|
||||
color: #E0E0E0;
|
||||
}
|
||||
.info-card {
|
||||
background: #2a2a2a;
|
||||
border: 1px solid #444;
|
||||
border-radius: 8px;
|
||||
overflow: hidden;
|
||||
}
|
||||
.info-header {
|
||||
background: #1a1a1a;
|
||||
padding: 20px;
|
||||
border-bottom: 1px solid #444;
|
||||
}
|
||||
.info-header h3 {
|
||||
color: #FFFFFF;
|
||||
margin: 0 0 10px 0;
|
||||
font-size: 22px;
|
||||
text-align: center;
|
||||
}
|
||||
.model-tags {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
gap: 10px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.model-tag {
|
||||
background: #121212;
|
||||
color: #FF0033;
|
||||
padding: 5px 10px;
|
||||
border-radius: 4px;
|
||||
font-size: 12px;
|
||||
border: 1px solid #FF0033;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
.model-composition {
|
||||
padding: 20px;
|
||||
border-bottom: 1px solid #444;
|
||||
}
|
||||
.model-composition h4 {
|
||||
color: #FFFFFF;
|
||||
margin: 0 0 15px 0;
|
||||
font-size: 18px;
|
||||
text-align: center;
|
||||
}
|
||||
.composition-list {
|
||||
list-style: none;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
display: grid;
|
||||
gap: 10px;
|
||||
}
|
||||
.composition-list li {
|
||||
color: #E0E0E0;
|
||||
display: flex;
|
||||
align-items: baseline;
|
||||
gap: 8px;
|
||||
}
|
||||
.model-component {
|
||||
font-weight: bold;
|
||||
min-width: 120px;
|
||||
}
|
||||
.model-description {
|
||||
padding: 20px;
|
||||
background: #2a2a2a;
|
||||
}
|
||||
.metrics-section {
|
||||
margin-bottom: 30px;
|
||||
}
|
||||
.metrics-section details {
|
||||
background: #2a2a2a;
|
||||
border: 1px solid #444;
|
||||
border-radius: 8px;
|
||||
padding: 15px;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
.metrics-section summary {
|
||||
color: #FF0033;
|
||||
font-size: 18px;
|
||||
cursor: pointer;
|
||||
outline: none;
|
||||
padding: 5px 0;
|
||||
text-align: center;
|
||||
font-weight: bold;
|
||||
}
|
||||
.creator-section {
|
||||
margin: 20px 0;
|
||||
}
|
||||
.creator-badge {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
background: #1a1a1a;
|
||||
border: 1px solid #444;
|
||||
border-radius: 4px;
|
||||
padding: 10px 15px;
|
||||
}
|
||||
.creator-label {
|
||||
color: #E0E0E0;
|
||||
font-size: 14px;
|
||||
margin-right: 8px;
|
||||
}
|
||||
.creator-link {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 5px;
|
||||
color: #FF0033;
|
||||
text-decoration: none;
|
||||
transition: all 0.2s ease;
|
||||
}
|
||||
.creator-name {
|
||||
font-weight: 600;
|
||||
}
|
||||
.creator-arrow {
|
||||
font-size: 16px;
|
||||
transition: transform 0.2s ease;
|
||||
}
|
||||
.creator-link:hover .creator-arrow {
|
||||
transform: translateX(5px);
|
||||
}
|
||||
.link-arrow {
|
||||
display: inline-block;
|
||||
transition: transform 0.2s ease;
|
||||
}
|
||||
a:hover .link-arrow {
|
||||
transform: translateX(5px);
|
||||
}
|
||||
.axolotl-container {
|
||||
text-align: center;
|
||||
margin: 30px 0;
|
||||
filter: invert(1) hue-rotate(180deg);
|
||||
}
|
||||
.axolotl-container img {
|
||||
max-width: 300px;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.3);
|
||||
border: 1px solid #000000;
|
||||
}
|
||||
</style>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Rei-KTO-24B</title>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="header">
|
||||
<h1>Rei-KTO-24B</h1>
|
||||
</div>
|
||||
<div class="info">
|
||||
<img src="https://cdn-uploads.huggingface.co/production/uploads/66c26b6fb01b19d8c3c2467b/EFNIBpVv1OITZl_su5DyR.png" alt="Model banner">
|
||||
<div style="text-align: center;">
|
||||
<div class="creator-section">
|
||||
<div class="creator-badge">
|
||||
<span class="creator-label">Created by</span>
|
||||
<a href="https://huggingface.co/Delta-Vector" target="_blank" class="creator-link">
|
||||
<span class="creator-name">Delta-Vector</span>
|
||||
<span class="creator-arrow">→</span>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
<div class="model-info">
|
||||
<h2>Model Information</h2>
|
||||
<div class="info-card">
|
||||
<div class="info-header">
|
||||
<h>Rei-KTO-24B</h3>
|
||||
<div class="model-tags">
|
||||
<span class="model-tag">KTO enhanced</span>
|
||||
<span class="model-tag">Painted Fantasy Finetune</span>
|
||||
<span class="model-tag">Creative Prose</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="model-description">
|
||||
<p>A model meant to replicate the style and prose of the Anthropic Claude models, Opus and Sonnet. This model is meant for Roleplaying/Creative-writing, Has some nice smarts without being too sloppy, etc - It's pretty good. Trained in 2 steps, Firstly SFT trained on Zerofata's PaintedFantasy which i found great at anime-otaku-esque characters, and then KTO'd to improve coherency and Instruct Following </p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section">
|
||||
<h2>Quantized Versions</h2>
|
||||
<div class="info-card">
|
||||
<div class="model-composition">
|
||||
<h4>Available Downloads</h4>
|
||||
<ul class="composition-list">
|
||||
<li><span class="model-component"><a href="https://huggingface.co/mradermacher/Rei-24B-KTO-GGUF" target="_blank">GGUF Format</a></span>For use with LLama.cpp & Forks (Ty Mradermacher <3)</li>
|
||||
<li><span class="model-component"><a href="https://huggingface.co/DeathGodlike/Rei-24B-KTO_EXL3" target="_blank">EXL3 Format</a></span>For use with TabbyAPI</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section">
|
||||
<h2>Prompting</h2>
|
||||
<p>The model is tuned with V7 Tekken formatting. A typical input would look like this:</p>
|
||||
<pre><code><span style="color:#FF0033;"><s>[SYSTEM_PROMPT]system_prompt[/SYSTEM_PROMPT][INST]Hi there![/INST]Nice to meet you!</s>[INST]Can I ask a question?[/INST]
|
||||
</code></pre>
|
||||
</div>
|
||||
<div class="section">
|
||||
<h2>Training</h2>
|
||||
<p>Training was done in 2 steps, SFT>KTO</p>
|
||||
<div class="metrics-section">
|
||||
<details>
|
||||
<summary>Access Configs</summary>
|
||||
<pre><code> SFT: https://wandb.ai/new-eden/Painted-Fantasy-Rei/artifacts/axolotl-config/config-u7to9d5q/v0/files/axolotl_config_f0p7vnaf.yml
|
||||
KTO : https://wandb.ai/new-eden/Painted-Rei/artifacts/axolotl-config/config-8n37w77c/v0/files/axolotl_config_hvrd2tzn.yml
|
||||
</code></pre>
|
||||
</details>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section">
|
||||
<h2>Training</h2>
|
||||
<p>The training was done for 2 epochs using 8 x A100s for 24 hours/p>
|
||||
<p style="text-align: center; margin-top: 20px;">
|
||||
<div class="axolotl-container">
|
||||
<a href="https://github.com/OpenAccess-AI-Collective/axolotl" target="_blank">
|
||||
<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl">
|
||||
</a>
|
||||
</div>
|
||||
<div class="section">
|
||||
<h2>Credits</h2>
|
||||
<p>Thank you to <a href="https://huggingface.co/lucyknada">Lucy Knada</a>, <a href="https://huggingface.co/Ateron">Ateron</a>, <a href="https://huggingface.co/AliCat2">Alicat</a>, <a href="https://huggingface.co/intervitens">Intervitens</a>, <a href="https://huggingface.co/cgato">Cgato</a>, <a href="https://huggingface.co/kubernetes-bad">Kubernetes Bad</a> and the rest of <a href="https://huggingface.co/anthracite-org">Anthracite</a>.</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
51
chat_template.jinja
Normal file
51
chat_template.jinja
Normal file
@@ -0,0 +1,51 @@
|
||||
{%- set today = strftime_now("%Y-%m-%d") %}
|
||||
{%- set default_system_message = "You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris.\nYour knowledge base was last updated on 2023-10-01. The current date is " + today + ".\n\nWhen you're not sure about some information, you say that you don't have the information and don't make up anything.\nIf the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. \"What are some good restaurants around me?\" => \"Where are you?\" or \"When is the next flight to Tokyo\" => \"Where do you travel from?\")" %}
|
||||
|
||||
{{- bos_token }}
|
||||
|
||||
{%- if messages[0]['role'] == 'system' %}
|
||||
{%- if messages[0]['content'] is string %}
|
||||
{%- set system_message = messages[0]['content'] %}
|
||||
{%- else %}
|
||||
{%- set system_message = messages[0]['content'][0]['text'] %}
|
||||
{%- endif %}
|
||||
{%- set loop_messages = messages[1:] %}
|
||||
{%- else %}
|
||||
{%- set system_message = default_system_message %}
|
||||
{%- set loop_messages = messages %}
|
||||
{%- endif %}
|
||||
{{- '[SYSTEM_PROMPT]' + system_message + '[/SYSTEM_PROMPT]' }}
|
||||
|
||||
{%- for message in loop_messages %}
|
||||
{%- if message['role'] == 'user' %}
|
||||
{%- if message['content'] is string %}
|
||||
{{- '[INST]' + message['content'] + '[/INST]' }}
|
||||
{%- else %}
|
||||
{{- '[INST]' }}
|
||||
{%- for block in message['content'] %}
|
||||
{%- if block['type'] == 'text' %}
|
||||
{{- block['text'] }}
|
||||
{%- elif block['type'] in ['image', 'image_url'] %}
|
||||
{{- '[IMG]' }}
|
||||
{%- else %}
|
||||
{{- raise_exception('Only text and image blocks are supported in message content!') }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{{- '[/INST]' }}
|
||||
{%- endif %}
|
||||
{%- elif message['role'] == 'system' %}
|
||||
{%- if message['content'] is string %}
|
||||
{{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}
|
||||
{%- else %}
|
||||
{{- '[SYSTEM_PROMPT]' + message['content'][0]['text'] + '[/SYSTEM_PROMPT]' }}
|
||||
{%- endif %}
|
||||
{%- elif message['role'] == 'assistant' %}
|
||||
{%- if message['content'] is string %}
|
||||
{{- message['content'] + eos_token }}
|
||||
{%- else %}
|
||||
{{- message['content'][0]['text'] + eos_token }}
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{{- raise_exception('Only user, system and assistant roles are supported!') }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
26
config.json
Normal file
26
config.json
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"architectures": [
|
||||
"MistralForCausalLM"
|
||||
],
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 1,
|
||||
"eos_token_id": 2,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 5120,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 32768,
|
||||
"max_position_embeddings": 131072,
|
||||
"model_type": "mistral",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 40,
|
||||
"num_key_value_heads": 8,
|
||||
"rms_norm_eps": 1e-05,
|
||||
"rope_theta": 1000000000.0,
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": false,
|
||||
"torch_dtype": "bfloat16",
|
||||
"transformers_version": "4.53.1",
|
||||
"use_cache": true,
|
||||
"vocab_size": 131072
|
||||
}
|
||||
7
generation_config.json
Normal file
7
generation_config.json
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"_from_model_config": true,
|
||||
"bos_token_id": 1,
|
||||
"do_sample": true,
|
||||
"eos_token_id": 2,
|
||||
"transformers_version": "4.53.1"
|
||||
}
|
||||
3
model-00001-of-00010.safetensors
Normal file
3
model-00001-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b9b4cc3c9c276caa374f2eb7c82a838ccc5f1cc3d426267ad4adefd934a97b4d
|
||||
size 4781571736
|
||||
3
model-00002-of-00010.safetensors
Normal file
3
model-00002-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:cab8dcb3bbede0455b28dfb80692414db91fb2ae6e79342b98ca7fbecae79d3e
|
||||
size 4781592784
|
||||
3
model-00003-of-00010.safetensors
Normal file
3
model-00003-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:db1cf23e5c2de4c57abb13800c847facb7ea914733ccf8c5e3fc7660a0837b22
|
||||
size 4781592800
|
||||
3
model-00004-of-00010.safetensors
Normal file
3
model-00004-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b35b71354f8f860ed53fd1c578590e99eb4b3954920ce5067a781f9087168295
|
||||
size 4886471600
|
||||
3
model-00005-of-00010.safetensors
Normal file
3
model-00005-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:fcf983ad514ffeef4d0d3559358882fd54e97017002aaffe6bb2f4349fbbf2a0
|
||||
size 4781592824
|
||||
3
model-00006-of-00010.safetensors
Normal file
3
model-00006-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5be6481070f5e0a58908791c9a15977bb573025f250ac0145874250d4768a6b1
|
||||
size 4781592816
|
||||
3
model-00007-of-00010.safetensors
Normal file
3
model-00007-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:fe61b97b759fe04cf0aafa22ee1bde2612da688147b16328f72a36d3b3c9cac8
|
||||
size 4886471600
|
||||
3
model-00008-of-00010.safetensors
Normal file
3
model-00008-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5694220fb02c1d8a118d474836d902c3aac10628a9663e2bf6721dc59bfdf039
|
||||
size 4781592824
|
||||
3
model-00009-of-00010.safetensors
Normal file
3
model-00009-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:12d552caf65da1ef090b8bbfdbeb8e4218771f5d7f517d9951b64db6798f79a2
|
||||
size 4781592816
|
||||
3
model-00010-of-00010.safetensors
Normal file
3
model-00010-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d93d37682ef34f2c58fe073d5ada793e7c54526574f61f64fd194a6f36a3f802
|
||||
size 3900777072
|
||||
371
model.safetensors.index.json
Normal file
371
model.safetensors.index.json
Normal file
@@ -0,0 +1,371 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_parameters": 23572403200,
|
||||
"total_size": 47144806400
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "model-00010-of-00010.safetensors",
|
||||
"model.embed_tokens.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.input_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.input_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.10.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.10.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.10.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.10.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.10.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.10.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.11.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.11.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.11.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.11.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.11.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.11.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.12.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.12.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.12.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.12.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.12.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.12.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.13.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.13.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.13.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.13.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.13.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.13.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.14.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.14.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.14.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.14.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.14.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.14.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.15.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.15.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.15.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.15.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.15.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.15.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.16.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.16.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.16.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.16.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.16.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.16.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.17.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.17.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.17.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.17.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.17.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.17.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.18.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.18.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.18.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.18.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.18.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.18.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.19.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.19.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.19.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.19.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.19.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.19.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.2.input_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.20.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.20.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.20.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.20.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.20.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.20.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.21.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.21.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.21.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.21.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.21.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.21.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.22.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.22.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.22.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.22.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.22.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.22.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.23.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.23.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.23.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.23.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.23.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.23.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.24.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.24.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.24.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.24.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.24.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.24.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.25.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.25.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.25.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.25.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.25.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.25.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.26.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.26.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.26.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.26.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.26.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.26.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.27.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.27.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.27.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.27.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.27.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.27.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.28.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.28.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.28.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.28.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.28.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.28.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.28.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.28.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.28.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.29.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.29.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.29.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.29.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.29.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.29.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.29.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.29.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.29.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.3.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.3.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.3.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.3.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.3.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.30.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.30.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.30.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.30.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.30.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.30.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.30.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.30.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.30.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.31.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.31.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.31.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.31.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.31.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.31.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.31.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.31.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.31.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.32.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.32.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.32.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.32.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.32.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.32.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.32.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.32.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.32.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.33.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.33.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.33.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.33.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.33.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.33.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.33.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.33.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.33.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.34.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.34.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.34.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.34.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.34.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.34.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.34.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.34.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.34.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.35.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.35.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.35.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.35.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.35.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.35.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.35.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.35.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.35.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.36.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.36.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.36.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.36.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.36.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.36.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.36.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.36.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.36.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.37.input_layernorm.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.37.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.37.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.37.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.37.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.37.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.37.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.37.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.37.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.38.input_layernorm.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.38.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.38.mlp.gate_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.38.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.38.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.38.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.38.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.38.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.38.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.39.input_layernorm.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.39.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.39.mlp.gate_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.39.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.39.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.39.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.39.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.39.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.39.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.4.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.4.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.4.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.4.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.4.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.4.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.5.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.5.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.5.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.5.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.5.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.5.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.6.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.6.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.6.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.6.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.6.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.6.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.7.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.7.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.7.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.7.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.7.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.7.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.8.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.8.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.8.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.8.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.8.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.8.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.9.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.9.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.9.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.9.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.9.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.9.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.norm.weight": "model-00010-of-00010.safetensors"
|
||||
}
|
||||
}
|
||||
1032
special_tokens_map.json
Normal file
1032
special_tokens_map.json
Normal file
File diff suppressed because it is too large
Load Diff
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b76085f9923309d873994d444989f7eb6ec074b06f25b58f1e8d7b7741070949
|
||||
size 17078037
|
||||
9019
tokenizer_config.json
Normal file
9019
tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user