初始化项目,由ModelHub XC社区提供模型

Model: Naphula/Salamander-24B-v1
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-29 15:10:16 +08:00
commit 3ee8adb70f
19 changed files with 10999 additions and 0 deletions

36
.gitattributes vendored Normal file
View File

@@ -0,0 +1,36 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
tokenizer.json filter=lfs diff=lfs merge=lfs -text

184
README.md Normal file
View File

@@ -0,0 +1,184 @@
---
base_model:
- Darkhn/Magistral-2509-24B-Text-Only
- ReadyArt/Broken-Tutu-24B-Transgression-v2.0
- MuXodious/Tiamat-24B-Magistral-PaperWitch-heresy
- llmfan46/MS3.2-PaintedFantasy-v4.1-24B-ultra-uncensored-heretic-v1
- ReadyArt/Dark-Nexus-24B-v2.0
- ReadyArt/4.2.0-Broken-Tutu-24b
- Undi95/MistralThinker-v1.1
- TheDrummer/Magidonia-24B-v4.3
- TheDrummer/Rivermind-24B-v1
- MuXodious/Hearthfire-24B-absolute-heresy
- Darkhn/M3.2-24B-Animus-V7.1
- zerofata/MS3.2-PaintedFantasy-v3-24B
- CrucibleLab/M3.2-24B-Loki-V1.3
- aixonlab/Eurydice-24b-v3.5
- Nabbers1999/MS-24B-Bathory-GRPO
- Naphula/BeaverAI_Fallen-Mistral-Small-3.1-24B-v1e_textonly
- allura-forge/ms32-final-TEXTONLY
- TheDrummer/Precog-24B-v1
- dphn/Dolphin-Mistral-24B-Venice-Edition
- TroyDoesAI/BlackSheep-24B
- trashpanda-org/MS3.2-24B-Mullein-v2
- TheDrummer/Cydonia-24B-v4.3
- zerofata/MS3.2-PaintedFantasy-v2-24B
language: eng
library_name: transformers
license: apache-2.0
tags:
- mergekit
- merge
- mistral
- della
widget:
- text: "Salamander-24B-v1"
output:
url: https://cdn-uploads.huggingface.co/production/uploads/68e840caa318194c44ec2a04/3KVMnxiGakKyYgcqHIa8B.jpeg
---
# Salamander 24B v1
![Salamander](https://cdn-uploads.huggingface.co/production/uploads/68e840caa318194c44ec2a04/3KVMnxiGakKyYgcqHIa8B.jpeg)
This is **Checkpoint 82**, a new [`della`](https://arxiv.org/abs/2406.11617) merge combining several 2501, 2506, and 2509 models, with fallen mistral 2503 also sprinkled in.
No refusals were observed in the initial tests. The model should not require ablation or jailbreaks.
```yaml
architecture: MistralForCausalLM
models:
## BASE ##
- model: B:\24B\Darkhn--Magistral-2509-24B-Text-Only
## 2501 ##
- model: B:\24B\!models--ReadyArt--4.2.0-Broken-Tutu-24b
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--ReadyArt--Broken-Tutu-24B-Transgression-v2.0
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
- model: B:\24B\PrivateMerge29 # This merge is no longer available on HF
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--Nabbers1999--MS-24B-Bathory-GRPO
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--dphn--Dolphin-Mistral-24B-Venice-Edition
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--TroyDoesAI--BlackSheep-24B
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--aixonlab--Eurydice-24b-v3.5
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--Undi95--MistralThinker-v1.1
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
## 2503 ##
- model: B:\24B\!BeaverAI_Fallen-Mistral-Small-3.1-24B-v1e_textonly
parameters:
weight: 0.2
density: 0.9
epsilon: 0.09
## 2506 ##
- model: B:\24B\!models--zerofata--MS3.2-PaintedFantasy-v2-24B
parameters:
weight: 0.1
weight: 0.09
epsilon: 0.09
- model: B:\24B\!models--TheDrummer--Cydonia-24B-v4.3
parameters:
weight: 0.2
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--TheDrummer--Rivermind-24B-v1
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--trashpanda-org--MS3.2-24B-Mullein-v2
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--allura-forge--ms32-final-TEXTONLY
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--CrucibleLab--M3.2-24B-Loki-V1.3
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--Darkhn--M3.2-24B-Animus-V7.1
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
- model: B:\24B\MuXodious--Hearthfire-24B-absolute-heresy
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--ReadyArt--Dark-Nexus-24B-v2.0
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
## 2509##
- model: B:\24B\!models--TheDrummer--Precog-24B-v1
parameters:
weight: 0.2
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--TheDrummer--Magidonia-24B-v4.3
parameters:
weight: 0.2
density: 0.9
epsilon: 0.09
- model: B:\24B\llmfan46--MS3.2-PaintedFantasy-v4.1-24B-ultra-uncensored-heretic-v1
parameters:
weight: 0.2
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--zerofata--MS3.2-PaintedFantasy-v3-24B
parameters:
weight: 0.2
density: 0.9
epsilon: 0.09
- model: B:\24B\MuXodious--Tiamat-24B-Magistral-PaperWitch-heresy\textonly
parameters:
weight: 0.2
density: 0.9
epsilon: 0.09
merge_method: della
base_model: B:\24B\Darkhn--Magistral-2509-24B-Text-Only
parameters:
lambda: 1.0
normalize: false
int8_mask: false
rescale: true
tokenizer:
source: union
dtype: float32
out_dtype: bfloat16
name: C82
```

112
chat_template.jinja Normal file
View File

@@ -0,0 +1,112 @@
{%- set default_system_message = 'First draft your thinking process (inner monologue) until you arrive at a response. Format your response using Markdown, and use LaTeX for any mathematical equations. Write both your thoughts and the response in the same language as the input.\n\nYour thinking process must follow the template below:[THINK]Your thoughts or/and draft, like working through an exercise on scratch paper. Be as casual and as long as you want until you are confident to generate the response. Use the same language as the input.[/THINK]Here, provide a self-contained response.' %}
{{- bos_token }}
{#- Extract system message if present -#}
{%- if messages[0]['role'] == 'system' %}
{%- if messages[0]['content'] is string %}
{%- set raw_system_message = messages[0]['content'] %}
{%- else %}
{%- set raw_system_message = messages[0]['content'][0]['text'] %}
{%- endif %}
{%- set loop_messages = messages[1:] %}
{%- else %}
{%- set raw_system_message = "" %}
{%- set loop_messages = messages %}
{%- endif %}
{#- Detect THINK flag by searching for exact phrase "/think" -#}
{%- if "/think" in raw_system_message %}
{%- set THINK = True %}
{%- else %}
{%- set THINK = False %}
{%- endif %}
{#- Apply logic depending on THINK flag -#}
{%- if THINK %}
{%- if raw_system_message|length > 0 %}
{%- set system_message = default_system_message + "\n\n" + raw_system_message %}
{%- else %}
{%- set system_message = default_system_message %}
{%- endif %}
{{- '[SYSTEM_PROMPT]' + system_message + '[/SYSTEM_PROMPT]' }}
{%- else %}
{%- if raw_system_message|length > 0 %}
{{- '[SYSTEM_PROMPT]' + raw_system_message + '[/SYSTEM_PROMPT]' }}
{%- endif %}
{%- endif %}
{#- Tool description appended ONLY to last user message. Edits made by Unsloth #}
{%- set tools_description = "" %}
{%- set has_tools = false %}
{%- if tools is defined and tools is not none and tools|length > 0 %}
{%- set has_tools = true %}
{%- set tools_description = "[AVAILABLE_TOOLS]" + (tools | tojson) + "[/AVAILABLE_TOOLS]" %}
{{- tools_description }}
{%- endif %}
{%- for message in loop_messages %}
{%- if message['role'] == 'user' %}
{%- if message['content'] is string %}
{{- '[INST]' + message['content'] + '[/INST]' }}
{%- else %}
{{- '[INST]' }}
{%- for block in message['content'] %}
{%- if block['type'] == 'text' %}
{%- if block['text'] is defined %}
{{- block['text'] }}
{%- else %}
{{- block['content'] }}
{%- endif %}
{%- elif block['type'] in ['image', 'image_url'] %}
{{- '[IMG]' }}
{%- else %}
{{- raise_exception('Only text and image blocks are supported in message content!') }}
{%- endif %}
{%- endfor %}
{{- '[/INST]' }}
{%- endif %}
{%- elif message['role'] == 'system' %}
{%- if message['content'] is string %}
{{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}
{%- else %}
{{- '[SYSTEM_PROMPT]' + message['content'][0]['text'] + '[/SYSTEM_PROMPT]' }}
{%- endif %}
{%- elif message['role'] == 'assistant' %}
{%- if message['content'] is string %}
{{- message['content'] }}
{%- elif message['content'] is iterable %}
{{- message['content'][0]['text'] }}
{%- endif %}
{%- if message['tool_calls'] is defined and message['tool_calls'] is not none %}
{%- for tool in message['tool_calls'] %}
{%- set arguments = tool['function']['arguments'] %}
{%- if arguments is not string %}
{%- set arguments = arguments|tojson %}
{%- endif %}
{{- "[TOOL_CALLS]" + tool['function']['name'] + "[ARGS]" + arguments }}
{%- endfor %}
{%- endif %}
{{- eos_token }}
{%- elif message["role"] == "tool_results" or message["role"] == "tool" %}
{%- if message.content is defined and message.content.content is defined %}
{%- set content = message.content.content %}
{%- else %}
{%- set content = message.content %}
{%- endif %}
{{- "[TOOL_RESULTS]" + content|string + "[/TOOL_RESULTS]" }}
{%- else %}
{{- raise_exception('Only user, system, assistant and tool roles are supported!') }}
{%- endif %}
{%- endfor %}
{#- Licensed under the Apache License, Version 2.0 (the "License") #}

27
config.json Normal file
View File

@@ -0,0 +1,27 @@
{
"architectures": [
"MistralForCausalLM"
],
"attention_dropout": 0.0,
"bos_token_id": 1,
"dtype": "bfloat16",
"eos_token_id": 2,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 5120,
"initializer_range": 0.02,
"intermediate_size": 32768,
"max_position_embeddings": 131072,
"model_type": "mistral",
"num_attention_heads": 32,
"num_hidden_layers": 40,
"num_key_value_heads": 8,
"pad_token_id": 11,
"rms_norm_eps": 1e-05,
"rope_theta": 1000000000.0,
"sliding_window": null,
"tie_word_embeddings": false,
"transformers_version": "4.56.1",
"use_cache": true,
"vocab_size": 131078
}

135
mergekit_config.yml Normal file
View File

@@ -0,0 +1,135 @@
architecture: MistralForCausalLM
models:
## BASE ##
- model: B:\24B\Darkhn--Magistral-2509-24B-Text-Only
## 2501 ##
- model: B:\24B\!models--ReadyArt--4.2.0-Broken-Tutu-24b
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--ReadyArt--Broken-Tutu-24B-Transgression-v2.0
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
- model: B:\24B\PrivateMerge29 # This merge is no longer available on HF
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--Nabbers1999--MS-24B-Bathory-GRPO
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--dphn--Dolphin-Mistral-24B-Venice-Edition
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--TroyDoesAI--BlackSheep-24B
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--aixonlab--Eurydice-24b-v3.5
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--Undi95--MistralThinker-v1.1
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
## 2503 ##
- model: B:\24B\!BeaverAI_Fallen-Mistral-Small-3.1-24B-v1e_textonly
parameters:
weight: 0.2
density: 0.9
epsilon: 0.09
## 2506 ##
- model: B:\24B\!models--zerofata--MS3.2-PaintedFantasy-v2-24B
parameters:
weight: 0.1
weight: 0.09
epsilon: 0.09
- model: B:\24B\!models--TheDrummer--Cydonia-24B-v4.3
parameters:
weight: 0.2
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--TheDrummer--Rivermind-24B-v1
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--trashpanda-org--MS3.2-24B-Mullein-v2
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--allura-forge--ms32-final-TEXTONLY
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--CrucibleLab--M3.2-24B-Loki-V1.3
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--Darkhn--M3.2-24B-Animus-V7.1
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
- model: B:\24B\MuXodious--Hearthfire-24B-absolute-heresy
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--ReadyArt--Dark-Nexus-24B-v2.0
parameters:
weight: 0.1
density: 0.9
epsilon: 0.09
## 2509##
- model: B:\24B\!models--TheDrummer--Precog-24B-v1
parameters:
weight: 0.2
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--TheDrummer--Magidonia-24B-v4.3
parameters:
weight: 0.2
density: 0.9
epsilon: 0.09
- model: B:\24B\llmfan46--MS3.2-PaintedFantasy-v4.1-24B-ultra-uncensored-heretic-v1
parameters:
weight: 0.2
density: 0.9
epsilon: 0.09
- model: B:\24B\!models--zerofata--MS3.2-PaintedFantasy-v3-24B
parameters:
weight: 0.2
density: 0.9
epsilon: 0.09
- model: B:\24B\MuXodious--Tiamat-24B-Magistral-PaperWitch-heresy\textonly
parameters:
weight: 0.2
density: 0.9
epsilon: 0.09
merge_method: della
base_model: B:\24B\Darkhn--Magistral-2509-24B-Text-Only
parameters:
lambda: 1.0
normalize: false
int8_mask: false
rescale: true
tokenizer:
source: union
dtype: float32
out_dtype: bfloat16
name: C82

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:805ec7f4d6f6dde9cb8231997b7d65a8167d5cb32896910989a3f2ac5a7b57f0
size 4907512192

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1a38133c55c8092da1bea125c56e01ea36a0ed820d7a4cba8ecb345b430e95a9
size 4781592832

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:2b1b235607963d595323f7d85031bda091758762162cf4139e958aa56d445962
size 4781592816

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3d4e0152f669181d4de331c79ccc0b3012530f94c420fff2a280d39ad64a15da
size 4886471592

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d5dc389bc0c9db19e2c1310df210c1b5e93511e8164ba1fb643a5acf84f05e8e
size 4781592832

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ba33b8a665eebc602270513ec31f7e3b88d0c9092708acee450084d867b234a7
size 4781592816

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1425b4386fb86555348f59efd12ad1913e79fec128217e06748b7b156472a6d3
size 4886471592

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:25b9b8e698055133e1b6472c1f856bd9cdd209c2ebd711ec694f8d3ce4d0d1a0
size 4781592832

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:2e7f04a7ec63086da6ff1baf46ee7a22e1ba2877c6863e125e0f3c51dd451815
size 4781592800

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:a4e7b274eca8054c784f777a428dcac9fb1ff03203c8760802aee1e4d84c673b
size 3774959456

View File

@@ -0,0 +1,371 @@
{
"metadata": {
"total_size": 47144929280,
"mergekit_version": "0.1.4"
},
"weight_map": {
"lm_head.weight": "model-00001-of-00010.safetensors",
"model.embed_tokens.weight": "model-00001-of-00010.safetensors",
"model.layers.0.input_layernorm.weight": "model-00001-of-00010.safetensors",
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.input_layernorm.weight": "model-00001-of-00010.safetensors",
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.10.input_layernorm.weight": "model-00001-of-00010.safetensors",
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.11.input_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.12.input_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.13.input_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.14.input_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.14.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.14.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.14.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.14.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.14.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.14.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.14.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.15.input_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.15.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.15.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.15.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.15.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.15.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.15.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.15.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.15.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.16.input_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.16.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.16.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.16.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.16.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.16.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.16.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.16.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.16.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.17.input_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.17.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.17.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.17.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.17.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.17.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.17.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.17.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.17.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.18.input_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.18.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.18.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.18.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.18.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.18.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.18.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.18.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.18.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.19.input_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.19.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.19.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.19.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.19.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.19.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.19.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.19.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.19.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.2.input_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.2.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.2.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.2.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.2.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.2.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.2.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.2.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.2.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.20.input_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.20.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.20.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.20.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.20.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.20.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.20.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.20.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.20.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.21.input_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.21.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.21.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.21.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.21.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.21.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.21.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.21.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.21.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.22.input_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.22.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.22.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.22.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.22.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.22.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.22.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.22.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.22.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.23.input_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.23.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.23.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.23.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.23.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.23.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.23.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.23.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.23.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.24.input_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.24.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.24.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.24.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.24.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.24.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.24.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.24.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.24.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.25.input_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.25.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.25.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.25.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.25.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.25.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.25.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.25.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.25.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.26.input_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.26.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.26.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.26.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.26.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.26.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.26.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.26.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.26.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.27.input_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.27.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.27.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.27.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.27.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.27.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.27.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.27.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.27.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.28.input_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.28.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.28.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.28.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.28.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.28.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.28.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.28.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.28.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.29.input_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.29.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.29.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.29.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.29.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.29.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.29.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.29.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.29.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.3.input_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.3.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.3.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.3.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.3.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.3.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.3.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.3.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.3.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.30.input_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.30.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.30.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.30.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.30.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.30.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.30.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.30.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.30.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.31.input_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.31.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.31.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.31.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.31.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.31.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.31.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.31.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.31.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.32.input_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.32.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.32.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.32.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.32.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.32.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.32.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.32.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.32.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.33.input_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.33.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.33.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.33.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.33.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.33.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.33.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.33.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.33.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.34.input_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.34.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.34.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.34.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.34.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.34.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.34.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.34.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.34.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.35.input_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.35.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.35.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.35.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.35.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.35.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.35.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.35.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.35.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.36.input_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.36.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.36.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.36.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.36.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.36.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.36.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.36.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.36.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.37.input_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.37.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.37.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.37.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.37.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.37.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.37.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.37.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.37.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.38.input_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.38.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.38.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.38.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.38.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.38.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.38.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.38.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.38.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.39.input_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.39.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.39.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.39.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.39.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.39.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.39.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.39.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.39.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.4.input_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.4.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.4.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.4.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.4.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.4.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.4.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.4.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.4.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.5.input_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.5.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.5.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.5.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.5.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.5.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.5.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.5.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.5.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.6.input_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.6.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.6.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.6.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.6.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.6.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.6.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.6.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.6.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.7.input_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.7.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.7.mlp.gate_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.7.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.7.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.7.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.7.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.7.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.7.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.8.input_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.8.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.8.mlp.gate_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.8.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.8.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.8.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.8.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.8.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.8.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.9.input_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.9.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.9.mlp.gate_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.9.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.9.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.9.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.9.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.9.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.9.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
"model.norm.weight": "model-00010-of-00010.safetensors"
}
}

1032
special_tokens_map.json Normal file

File diff suppressed because it is too large Load Diff

3
tokenizer.json Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:439f2cd468feda92550d47e24b9d47512941082d9558561bc6f92a83ee493b43
size 17079133

9069
tokenizer_config.json Normal file

File diff suppressed because it is too large Load Diff