初始化项目,由ModelHub XC社区提供模型
Model: Vortex5/Starlit-Shadow-12B Source: Original Platform
This commit is contained in:
36
.gitattributes
vendored
Normal file
36
.gitattributes
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
281
README.md
Normal file
281
README.md
Normal file
@@ -0,0 +1,281 @@
|
||||
---
|
||||
base_model:
|
||||
- Vortex5/Red-Synthesis-12B
|
||||
- Retreatcost/KansenSakura-Erosion-RP-12b
|
||||
- Vortex5/NoctyxCosma-12B
|
||||
- Vortex5/Crimson-Twilight-12B
|
||||
library_name: transformers
|
||||
tags:
|
||||
- mergekit
|
||||
- merge
|
||||
- roleplay
|
||||
---
|
||||
<section class="shell cosmic-theme">
|
||||
|
||||
|
||||
<div class="title-frame">
|
||||
<div class="title-block wide">
|
||||
<h2 class="hero-title">Starlit-Shadow-12B</h2>
|
||||
</div>
|
||||
|
||||
<div class="image-slot inset">
|
||||
<img src="https://cdn-uploads.huggingface.co/production/uploads/6669a3a617b838fda45637b8/xkj2QnxaJLjkgl9rKN7Io.png">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="shadow-divider"></div>
|
||||
|
||||
|
||||
<section class="pane blended">
|
||||
<h2 class="pane-title">Overview<span class="underline"></span></h2>
|
||||
|
||||
<p>
|
||||
<strong style="color:#fff">Starry-Shadow-12B</strong> was created through a
|
||||
<strong>multi-stage merge</strong> involving
|
||||
<a href="https://huggingface.co/Vortex5/Red-Synthesis-12B">Red-Synthesis-12B</a>,
|
||||
<a href="https://huggingface.co/Retreatcost/KansenSakura-Erosion-RP-12b">KansenSakura-Erosion-RP-12B</a>,
|
||||
<a href="https://huggingface.co/Vortex5/NoctyxCosma-12B">NoctyxCosma-12B</a>,
|
||||
and
|
||||
<a href="https://huggingface.co/Vortex5/Crimson-Twilight-12B">Crimson-Twilight-12B</a>,
|
||||
</p>
|
||||
|
||||
|
||||
<details class="expando">
|
||||
<summary>Multi-stage merge configuration</summary>
|
||||
<pre>
|
||||
name: First
|
||||
models:
|
||||
- model: Vortex5/Red-Synthesis-12B
|
||||
merge_method: sm2f
|
||||
base_model: Retreatcost/KansenSakura-Erosion-RP-12b
|
||||
dtype: bfloat16
|
||||
tokenizer:
|
||||
source: Retreatcost/KansenSakura-Erosion-RP-12b
|
||||
---
|
||||
name: Second
|
||||
models:
|
||||
- model: Vortex5/NoctyxCosma-12B
|
||||
merge_method: sm2f
|
||||
base_model: Vortex5/Crimson-Twilight-12B
|
||||
dtype: bfloat16
|
||||
tokenizer:
|
||||
source: Vortex5/Crimson-Twilight-12B
|
||||
---
|
||||
models:
|
||||
- model: First
|
||||
- model: Second
|
||||
merge_method: karcher
|
||||
dtype: bfloat16
|
||||
parameters:
|
||||
tol: 1e-9
|
||||
max_iter: 1000
|
||||
tokenizer:
|
||||
source: Vortex5/Crimson-Twilight-12B
|
||||
</pre>
|
||||
</details>
|
||||
</section>
|
||||
|
||||
<section class="pane blended">
|
||||
<h2 class="pane-title">Intended Use<span class="underline"></span></h2>
|
||||
|
||||
<div class="tag-row">
|
||||
<div class="tagbox">🎭 Roleplay</div>
|
||||
<div class="tagbox">📜 Storytelling</div>
|
||||
<div class="tagbox">🌌 Creative Writing</div>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
|
||||
<div class="shadow-divider"></div>
|
||||
|
||||
</section>
|
||||
|
||||
<style>
|
||||
|
||||
.shell.cosmic-theme {
|
||||
max-width: 1050px;
|
||||
margin: 40px auto;
|
||||
padding: 42px 26px;
|
||||
|
||||
background: radial-gradient(
|
||||
circle at 50% 60%,
|
||||
#0b0b14 0%,
|
||||
#07070d 45%,
|
||||
#020205 100%
|
||||
);
|
||||
|
||||
border: 3px solid rgba(255, 80, 120, 0.75);
|
||||
border-radius: 18px;
|
||||
|
||||
box-shadow:
|
||||
0 0 60px rgba(0, 0, 0, 0.95),
|
||||
inset 0 0 60px rgba(0, 0, 0, 0.75);
|
||||
|
||||
color: #f2f2f6;
|
||||
font-family: "Inter", system-ui, sans-serif;
|
||||
}
|
||||
|
||||
.title-frame {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.title-block.wide {
|
||||
margin: 0 auto 22px;
|
||||
padding: 22px 0;
|
||||
|
||||
background: linear-gradient(
|
||||
135deg,
|
||||
rgba(40, 10, 25, 0.95),
|
||||
rgba(25, 8, 18, 0.92),
|
||||
rgba(15, 5, 12, 0.90)
|
||||
);
|
||||
|
||||
border: 3px solid rgba(255, 80, 120, 0.65);
|
||||
border-radius: 14px;
|
||||
|
||||
box-shadow:
|
||||
0 0 22px rgba(255, 80, 120, 0.35),
|
||||
inset 0 0 30px rgba(255, 60, 110, 0.35);
|
||||
}
|
||||
|
||||
.hero-title {
|
||||
margin: 0;
|
||||
font-family: "Cinzel Decorative", "Cinzel", serif;
|
||||
font-size: clamp(3rem, 5vw, 3.8rem);
|
||||
font-weight: 700;
|
||||
letter-spacing: 0.05em;
|
||||
|
||||
color: #ff8ccf;
|
||||
|
||||
text-shadow:
|
||||
0 0 10px rgba(255, 120, 190, 0.55),
|
||||
0 0 24px rgba(255, 80, 140, 0.45);
|
||||
}
|
||||
|
||||
.image-slot img {
|
||||
width: 100%;
|
||||
max-width: 880px;
|
||||
|
||||
border: 3px solid rgba(255, 100, 160, 0.85);
|
||||
border-radius: 10px;
|
||||
|
||||
box-shadow:
|
||||
0 0 18px rgba(255, 100, 160, 0.45),
|
||||
inset 0 0 10px rgba(255, 80, 120, 0.35);
|
||||
}
|
||||
|
||||
.shadow-divider {
|
||||
width: 92%;
|
||||
height: 10px;
|
||||
margin: 36px auto;
|
||||
|
||||
background: linear-gradient(
|
||||
to right,
|
||||
rgba(255, 120, 180, 0.95),
|
||||
rgba(90, 30, 60, 0.85),
|
||||
rgba(255, 120, 180, 0.95)
|
||||
);
|
||||
|
||||
clip-path: polygon(
|
||||
0% 50%,
|
||||
5% 0%,
|
||||
95% 0%,
|
||||
100% 50%,
|
||||
95% 100%,
|
||||
5% 100%
|
||||
);
|
||||
|
||||
box-shadow:
|
||||
0 0 18px rgba(255, 120, 180, 0.55);
|
||||
}
|
||||
|
||||
.pane {
|
||||
margin: 28px 0;
|
||||
padding: 28px;
|
||||
|
||||
border-radius: 8px;
|
||||
border: 3px solid rgba(255, 90, 140, 0.7);
|
||||
|
||||
background: linear-gradient(
|
||||
145deg,
|
||||
rgba(30, 8, 20, 0.92),
|
||||
rgba(18, 6, 14, 0.88),
|
||||
rgba(12, 4, 10, 0.82)
|
||||
);
|
||||
|
||||
box-shadow:
|
||||
0 0 22px rgba(255, 90, 140, 0.25),
|
||||
inset 0 0 28px rgba(255, 70, 120, 0.35);
|
||||
}
|
||||
|
||||
.pane-title {
|
||||
font-family: "Cinzel Decorative", serif;
|
||||
font-size: 2.4rem;
|
||||
font-weight: 700;
|
||||
letter-spacing: 0.04em;
|
||||
|
||||
color: #ff9fdc;
|
||||
|
||||
text-shadow:
|
||||
0 0 12px rgba(255, 140, 200, 0.55),
|
||||
0 0 28px rgba(255, 90, 150, 0.45);
|
||||
|
||||
padding-bottom: 14px;
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.pane-title::after {
|
||||
content: "";
|
||||
position: absolute;
|
||||
left: 0;
|
||||
bottom: 0;
|
||||
width: 100%;
|
||||
height: 3px;
|
||||
|
||||
background: linear-gradient(
|
||||
to right,
|
||||
rgba(255, 150, 210, 0.95),
|
||||
rgba(0, 0, 0, 0)
|
||||
);
|
||||
}
|
||||
|
||||
.tag-row {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
gap: 14px;
|
||||
flex-wrap: wrap;
|
||||
margin: 14px 0;
|
||||
}
|
||||
|
||||
.tagbox {
|
||||
padding: 10px 18px;
|
||||
font-size: 1.15rem;
|
||||
|
||||
background: rgba(20, 8, 14, 0.85);
|
||||
border: 2px solid rgba(255, 120, 180, 0.65);
|
||||
border-radius: 10px;
|
||||
|
||||
box-shadow:
|
||||
0 0 12px rgba(255, 120, 180, 0.35);
|
||||
}
|
||||
|
||||
details.expando {
|
||||
margin-top: 16px;
|
||||
background: rgba(10, 4, 8, 0.85);
|
||||
border: 2px solid rgba(255, 100, 160, 0.45);
|
||||
}
|
||||
|
||||
details summary {
|
||||
padding: 12px;
|
||||
font-family: "JetBrains Mono", monospace;
|
||||
font-size: 0.85rem;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
pre {
|
||||
background: rgba(6, 2, 4, 0.85);
|
||||
color: #ffd6ef;
|
||||
padding: 18px;
|
||||
overflow-x: auto;
|
||||
}
|
||||
</style>
|
||||
26
config.json
Normal file
26
config.json
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"architectures": [
|
||||
"MistralForCausalLM"
|
||||
],
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 1,
|
||||
"dtype": "bfloat16",
|
||||
"eos_token_id": 2,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 5120,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 14336,
|
||||
"max_position_embeddings": 1024000,
|
||||
"model_type": "mistral",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 40,
|
||||
"num_key_value_heads": 8,
|
||||
"rms_norm_eps": 1e-05,
|
||||
"rope_theta": 1000000.0,
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": false,
|
||||
"transformers_version": "4.57.3",
|
||||
"use_cache": false,
|
||||
"vocab_size": 131075
|
||||
}
|
||||
15
mergekit_config.yml
Normal file
15
mergekit_config.yml
Normal file
@@ -0,0 +1,15 @@
|
||||
dtype: bfloat16
|
||||
merge_method: karcher
|
||||
modules:
|
||||
default:
|
||||
slices:
|
||||
- sources:
|
||||
- layer_range: [0, 40]
|
||||
model: ./intermediates/First
|
||||
- layer_range: [0, 40]
|
||||
model: ./intermediates/Second
|
||||
parameters:
|
||||
max_iter: 1000.0
|
||||
tol: 1.0e-09
|
||||
tokenizer:
|
||||
source: Vortex5/Crimson-Twilight-12B
|
||||
3
model-00001-of-00005.safetensors
Normal file
3
model-00001-of-00005.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e306902787eb89bb935aaefc14743eb6b76daa7c063d90bb6fdbc098317023a4
|
||||
size 4865550776
|
||||
3
model-00002-of-00005.safetensors
Normal file
3
model-00002-of-00005.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:18601ddae8410730c8ffe6f28d95098e29f690a35111428aa2941b52126aacb7
|
||||
size 4907529456
|
||||
3
model-00003-of-00005.safetensors
Normal file
3
model-00003-of-00005.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9d3cb4bd4d2744d6a25fbfe71fe6322ecef61d95909df5a35a9bf879236eaf94
|
||||
size 4907529464
|
||||
3
model-00004-of-00005.safetensors
Normal file
3
model-00004-of-00005.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:eb6f9bd4ff9f6a8bc0dded1feef50610bea6b6210691fd8bec9bdb70f3e03535
|
||||
size 4907529456
|
||||
3
model-00005-of-00005.safetensors
Normal file
3
model-00005-of-00005.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c860a25e49ca63a1fcf2349e2c6a950fe7cce34a31f08ce5bbadc39893f78c12
|
||||
size 4907529392
|
||||
371
model.safetensors.index.json
Normal file
371
model.safetensors.index.json
Normal file
@@ -0,0 +1,371 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_size": 24495626240,
|
||||
"mergekit_version": "0.1.4"
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "model-00001-of-00005.safetensors",
|
||||
"model.embed_tokens.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.0.input_layernorm.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.1.input_layernorm.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.10.input_layernorm.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.10.mlp.down_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.10.mlp.gate_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.10.mlp.up_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.10.post_attention_layernorm.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.10.self_attn.o_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.11.input_layernorm.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.11.mlp.down_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.11.mlp.gate_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.11.mlp.up_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.11.post_attention_layernorm.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.11.self_attn.o_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.12.input_layernorm.weight": "model-00001-of-00005.safetensors",
|
||||
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.13.input_layernorm.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.14.input_layernorm.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.15.input_layernorm.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.16.input_layernorm.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.17.input_layernorm.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.18.input_layernorm.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.19.input_layernorm.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.2.input_layernorm.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.2.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.2.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.2.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.2.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.2.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.20.input_layernorm.weight": "model-00002-of-00005.safetensors",
|
||||
"model.layers.20.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.20.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.20.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.20.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.20.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.21.input_layernorm.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.21.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.21.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.21.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.21.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.21.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.22.input_layernorm.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.22.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.22.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.22.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.23.input_layernorm.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.23.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.24.input_layernorm.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.25.input_layernorm.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.26.input_layernorm.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.27.input_layernorm.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.27.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.28.input_layernorm.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.28.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.28.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.28.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.28.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.28.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.28.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.28.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.28.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.29.input_layernorm.weight": "model-00003-of-00005.safetensors",
|
||||
"model.layers.29.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.29.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.29.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.29.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.29.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.29.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.29.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.29.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.3.input_layernorm.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.3.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.3.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.3.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.3.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.3.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.30.input_layernorm.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.30.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.30.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.30.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.30.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.30.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.30.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.30.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.30.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.31.input_layernorm.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.31.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.31.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.31.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.31.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.31.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.31.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.31.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.31.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.32.input_layernorm.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.32.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.32.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.32.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.32.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.32.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.32.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.32.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.32.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.33.input_layernorm.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.33.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.33.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.33.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.33.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.33.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.33.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.33.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.33.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.34.input_layernorm.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.34.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.34.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.34.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.34.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.34.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.34.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.34.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.34.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.35.input_layernorm.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.35.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.35.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.35.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.35.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.35.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.35.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.35.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.35.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.36.input_layernorm.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.36.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.36.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.36.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.36.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.36.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.36.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.36.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.36.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.37.input_layernorm.weight": "model-00004-of-00005.safetensors",
|
||||
"model.layers.37.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.37.mlp.gate_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.37.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.37.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.37.self_attn.k_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.37.self_attn.o_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.37.self_attn.q_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.37.self_attn.v_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.38.input_layernorm.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.38.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.38.mlp.gate_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.38.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.38.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.38.self_attn.k_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.38.self_attn.o_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.38.self_attn.q_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.38.self_attn.v_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.39.input_layernorm.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.39.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.39.mlp.gate_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.39.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.39.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.39.self_attn.k_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.39.self_attn.o_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.39.self_attn.q_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.39.self_attn.v_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.4.input_layernorm.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.4.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.4.mlp.gate_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.4.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.4.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.4.self_attn.o_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.5.input_layernorm.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.5.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.5.mlp.gate_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.5.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.5.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.5.self_attn.o_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.6.input_layernorm.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.6.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.6.mlp.gate_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.6.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.6.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.6.self_attn.o_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.7.input_layernorm.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.7.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.7.mlp.gate_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.7.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.7.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.7.self_attn.o_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.8.input_layernorm.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.8.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.8.mlp.gate_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.8.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.8.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.8.self_attn.o_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.9.input_layernorm.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.9.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.9.mlp.gate_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.9.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.9.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.9.self_attn.o_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.weight": "model-00005-of-00005.safetensors",
|
||||
"model.norm.weight": "model-00005-of-00005.safetensors"
|
||||
}
|
||||
}
|
||||
23
special_tokens_map.json
Normal file
23
special_tokens_map.json
Normal file
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"bos_token": {
|
||||
"content": "<s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"eos_token": {
|
||||
"content": "</s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"unk_token": {
|
||||
"content": "<unk>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:33cb74e9ca1c0323b1be5f7e367b96bae218c22d0c4ed5b0edeec39f3a8755c3
|
||||
size 17078856
|
||||
8042
tokenizer_config.json
Normal file
8042
tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user