初始化项目,由ModelHub XC社区提供模型
Model: Naphula/Salamander-24B-v1 Source: Original Platform
This commit is contained in:
36
.gitattributes
vendored
Normal file
36
.gitattributes
vendored
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.model filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||||
184
README.md
Normal file
184
README.md
Normal file
@@ -0,0 +1,184 @@
|
|||||||
|
---
|
||||||
|
base_model:
|
||||||
|
- Darkhn/Magistral-2509-24B-Text-Only
|
||||||
|
- ReadyArt/Broken-Tutu-24B-Transgression-v2.0
|
||||||
|
- MuXodious/Tiamat-24B-Magistral-PaperWitch-heresy
|
||||||
|
- llmfan46/MS3.2-PaintedFantasy-v4.1-24B-ultra-uncensored-heretic-v1
|
||||||
|
- ReadyArt/Dark-Nexus-24B-v2.0
|
||||||
|
- ReadyArt/4.2.0-Broken-Tutu-24b
|
||||||
|
- Undi95/MistralThinker-v1.1
|
||||||
|
- TheDrummer/Magidonia-24B-v4.3
|
||||||
|
- TheDrummer/Rivermind-24B-v1
|
||||||
|
- MuXodious/Hearthfire-24B-absolute-heresy
|
||||||
|
- Darkhn/M3.2-24B-Animus-V7.1
|
||||||
|
- zerofata/MS3.2-PaintedFantasy-v3-24B
|
||||||
|
- CrucibleLab/M3.2-24B-Loki-V1.3
|
||||||
|
- aixonlab/Eurydice-24b-v3.5
|
||||||
|
- Nabbers1999/MS-24B-Bathory-GRPO
|
||||||
|
- Naphula/BeaverAI_Fallen-Mistral-Small-3.1-24B-v1e_textonly
|
||||||
|
- allura-forge/ms32-final-TEXTONLY
|
||||||
|
- TheDrummer/Precog-24B-v1
|
||||||
|
- dphn/Dolphin-Mistral-24B-Venice-Edition
|
||||||
|
- TroyDoesAI/BlackSheep-24B
|
||||||
|
- trashpanda-org/MS3.2-24B-Mullein-v2
|
||||||
|
- TheDrummer/Cydonia-24B-v4.3
|
||||||
|
- zerofata/MS3.2-PaintedFantasy-v2-24B
|
||||||
|
language: eng
|
||||||
|
library_name: transformers
|
||||||
|
license: apache-2.0
|
||||||
|
tags:
|
||||||
|
- mergekit
|
||||||
|
- merge
|
||||||
|
- mistral
|
||||||
|
- della
|
||||||
|
widget:
|
||||||
|
- text: "Salamander-24B-v1"
|
||||||
|
output:
|
||||||
|
url: https://cdn-uploads.huggingface.co/production/uploads/68e840caa318194c44ec2a04/3KVMnxiGakKyYgcqHIa8B.jpeg
|
||||||
|
---
|
||||||
|
|
||||||
|
# Salamander 24B v1
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
This is **Checkpoint 82**, a new [`della`](https://arxiv.org/abs/2406.11617) merge combining several 2501, 2506, and 2509 models, with fallen mistral 2503 also sprinkled in.
|
||||||
|
|
||||||
|
No refusals were observed in the initial tests. The model should not require ablation or jailbreaks.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
architecture: MistralForCausalLM
|
||||||
|
models:
|
||||||
|
## BASE ##
|
||||||
|
- model: B:\24B\Darkhn--Magistral-2509-24B-Text-Only
|
||||||
|
## 2501 ##
|
||||||
|
- model: B:\24B\!models--ReadyArt--4.2.0-Broken-Tutu-24b
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--ReadyArt--Broken-Tutu-24B-Transgression-v2.0
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\PrivateMerge29 # This merge is no longer available on HF
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--Nabbers1999--MS-24B-Bathory-GRPO
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--dphn--Dolphin-Mistral-24B-Venice-Edition
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--TroyDoesAI--BlackSheep-24B
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--aixonlab--Eurydice-24b-v3.5
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--Undi95--MistralThinker-v1.1
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
## 2503 ##
|
||||||
|
- model: B:\24B\!BeaverAI_Fallen-Mistral-Small-3.1-24B-v1e_textonly
|
||||||
|
parameters:
|
||||||
|
weight: 0.2
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
## 2506 ##
|
||||||
|
- model: B:\24B\!models--zerofata--MS3.2-PaintedFantasy-v2-24B
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
weight: 0.09
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--TheDrummer--Cydonia-24B-v4.3
|
||||||
|
parameters:
|
||||||
|
weight: 0.2
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--TheDrummer--Rivermind-24B-v1
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--trashpanda-org--MS3.2-24B-Mullein-v2
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--allura-forge--ms32-final-TEXTONLY
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--CrucibleLab--M3.2-24B-Loki-V1.3
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--Darkhn--M3.2-24B-Animus-V7.1
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\MuXodious--Hearthfire-24B-absolute-heresy
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--ReadyArt--Dark-Nexus-24B-v2.0
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
## 2509##
|
||||||
|
- model: B:\24B\!models--TheDrummer--Precog-24B-v1
|
||||||
|
parameters:
|
||||||
|
weight: 0.2
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--TheDrummer--Magidonia-24B-v4.3
|
||||||
|
parameters:
|
||||||
|
weight: 0.2
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\llmfan46--MS3.2-PaintedFantasy-v4.1-24B-ultra-uncensored-heretic-v1
|
||||||
|
parameters:
|
||||||
|
weight: 0.2
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--zerofata--MS3.2-PaintedFantasy-v3-24B
|
||||||
|
parameters:
|
||||||
|
weight: 0.2
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\MuXodious--Tiamat-24B-Magistral-PaperWitch-heresy\textonly
|
||||||
|
parameters:
|
||||||
|
weight: 0.2
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
merge_method: della
|
||||||
|
base_model: B:\24B\Darkhn--Magistral-2509-24B-Text-Only
|
||||||
|
parameters:
|
||||||
|
lambda: 1.0
|
||||||
|
normalize: false
|
||||||
|
int8_mask: false
|
||||||
|
rescale: true
|
||||||
|
tokenizer:
|
||||||
|
source: union
|
||||||
|
dtype: float32
|
||||||
|
out_dtype: bfloat16
|
||||||
|
name: C82
|
||||||
|
```
|
||||||
112
chat_template.jinja
Normal file
112
chat_template.jinja
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
{%- set default_system_message = 'First draft your thinking process (inner monologue) until you arrive at a response. Format your response using Markdown, and use LaTeX for any mathematical equations. Write both your thoughts and the response in the same language as the input.\n\nYour thinking process must follow the template below:[THINK]Your thoughts or/and draft, like working through an exercise on scratch paper. Be as casual and as long as you want until you are confident to generate the response. Use the same language as the input.[/THINK]Here, provide a self-contained response.' %}
|
||||||
|
|
||||||
|
{{- bos_token }}
|
||||||
|
|
||||||
|
{#- Extract system message if present -#}
|
||||||
|
{%- if messages[0]['role'] == 'system' %}
|
||||||
|
{%- if messages[0]['content'] is string %}
|
||||||
|
{%- set raw_system_message = messages[0]['content'] %}
|
||||||
|
{%- else %}
|
||||||
|
{%- set raw_system_message = messages[0]['content'][0]['text'] %}
|
||||||
|
{%- endif %}
|
||||||
|
{%- set loop_messages = messages[1:] %}
|
||||||
|
{%- else %}
|
||||||
|
{%- set raw_system_message = "" %}
|
||||||
|
{%- set loop_messages = messages %}
|
||||||
|
{%- endif %}
|
||||||
|
|
||||||
|
{#- Detect THINK flag by searching for exact phrase "/think" -#}
|
||||||
|
{%- if "/think" in raw_system_message %}
|
||||||
|
{%- set THINK = True %}
|
||||||
|
{%- else %}
|
||||||
|
{%- set THINK = False %}
|
||||||
|
{%- endif %}
|
||||||
|
|
||||||
|
{#- Apply logic depending on THINK flag -#}
|
||||||
|
{%- if THINK %}
|
||||||
|
{%- if raw_system_message|length > 0 %}
|
||||||
|
{%- set system_message = default_system_message + "\n\n" + raw_system_message %}
|
||||||
|
{%- else %}
|
||||||
|
{%- set system_message = default_system_message %}
|
||||||
|
{%- endif %}
|
||||||
|
{{- '[SYSTEM_PROMPT]' + system_message + '[/SYSTEM_PROMPT]' }}
|
||||||
|
{%- else %}
|
||||||
|
{%- if raw_system_message|length > 0 %}
|
||||||
|
{{- '[SYSTEM_PROMPT]' + raw_system_message + '[/SYSTEM_PROMPT]' }}
|
||||||
|
{%- endif %}
|
||||||
|
{%- endif %}
|
||||||
|
|
||||||
|
|
||||||
|
{#- Tool description appended ONLY to last user message. Edits made by Unsloth #}
|
||||||
|
{%- set tools_description = "" %}
|
||||||
|
{%- set has_tools = false %}
|
||||||
|
|
||||||
|
{%- if tools is defined and tools is not none and tools|length > 0 %}
|
||||||
|
{%- set has_tools = true %}
|
||||||
|
{%- set tools_description = "[AVAILABLE_TOOLS]" + (tools | tojson) + "[/AVAILABLE_TOOLS]" %}
|
||||||
|
{{- tools_description }}
|
||||||
|
{%- endif %}
|
||||||
|
|
||||||
|
{%- for message in loop_messages %}
|
||||||
|
{%- if message['role'] == 'user' %}
|
||||||
|
|
||||||
|
{%- if message['content'] is string %}
|
||||||
|
{{- '[INST]' + message['content'] + '[/INST]' }}
|
||||||
|
{%- else %}
|
||||||
|
{{- '[INST]' }}
|
||||||
|
{%- for block in message['content'] %}
|
||||||
|
{%- if block['type'] == 'text' %}
|
||||||
|
{%- if block['text'] is defined %}
|
||||||
|
{{- block['text'] }}
|
||||||
|
{%- else %}
|
||||||
|
{{- block['content'] }}
|
||||||
|
{%- endif %}
|
||||||
|
{%- elif block['type'] in ['image', 'image_url'] %}
|
||||||
|
{{- '[IMG]' }}
|
||||||
|
{%- else %}
|
||||||
|
{{- raise_exception('Only text and image blocks are supported in message content!') }}
|
||||||
|
{%- endif %}
|
||||||
|
{%- endfor %}
|
||||||
|
{{- '[/INST]' }}
|
||||||
|
{%- endif %}
|
||||||
|
|
||||||
|
{%- elif message['role'] == 'system' %}
|
||||||
|
{%- if message['content'] is string %}
|
||||||
|
{{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}
|
||||||
|
{%- else %}
|
||||||
|
{{- '[SYSTEM_PROMPT]' + message['content'][0]['text'] + '[/SYSTEM_PROMPT]' }}
|
||||||
|
{%- endif %}
|
||||||
|
|
||||||
|
{%- elif message['role'] == 'assistant' %}
|
||||||
|
{%- if message['content'] is string %}
|
||||||
|
{{- message['content'] }}
|
||||||
|
{%- elif message['content'] is iterable %}
|
||||||
|
{{- message['content'][0]['text'] }}
|
||||||
|
{%- endif %}
|
||||||
|
|
||||||
|
{%- if message['tool_calls'] is defined and message['tool_calls'] is not none %}
|
||||||
|
{%- for tool in message['tool_calls'] %}
|
||||||
|
{%- set arguments = tool['function']['arguments'] %}
|
||||||
|
{%- if arguments is not string %}
|
||||||
|
{%- set arguments = arguments|tojson %}
|
||||||
|
{%- endif %}
|
||||||
|
{{- "[TOOL_CALLS]" + tool['function']['name'] + "[ARGS]" + arguments }}
|
||||||
|
{%- endfor %}
|
||||||
|
{%- endif %}
|
||||||
|
|
||||||
|
{{- eos_token }}
|
||||||
|
|
||||||
|
{%- elif message["role"] == "tool_results" or message["role"] == "tool" %}
|
||||||
|
{%- if message.content is defined and message.content.content is defined %}
|
||||||
|
{%- set content = message.content.content %}
|
||||||
|
{%- else %}
|
||||||
|
{%- set content = message.content %}
|
||||||
|
{%- endif %}
|
||||||
|
{{- "[TOOL_RESULTS]" + content|string + "[/TOOL_RESULTS]" }}
|
||||||
|
|
||||||
|
{%- else %}
|
||||||
|
{{- raise_exception('Only user, system, assistant and tool roles are supported!') }}
|
||||||
|
{%- endif %}
|
||||||
|
{%- endfor %}
|
||||||
|
|
||||||
|
{#- Licensed under the Apache License, Version 2.0 (the "License") #}
|
||||||
27
config.json
Normal file
27
config.json
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
{
|
||||||
|
"architectures": [
|
||||||
|
"MistralForCausalLM"
|
||||||
|
],
|
||||||
|
"attention_dropout": 0.0,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"dtype": "bfloat16",
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"head_dim": 128,
|
||||||
|
"hidden_act": "silu",
|
||||||
|
"hidden_size": 5120,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"intermediate_size": 32768,
|
||||||
|
"max_position_embeddings": 131072,
|
||||||
|
"model_type": "mistral",
|
||||||
|
"num_attention_heads": 32,
|
||||||
|
"num_hidden_layers": 40,
|
||||||
|
"num_key_value_heads": 8,
|
||||||
|
"pad_token_id": 11,
|
||||||
|
"rms_norm_eps": 1e-05,
|
||||||
|
"rope_theta": 1000000000.0,
|
||||||
|
"sliding_window": null,
|
||||||
|
"tie_word_embeddings": false,
|
||||||
|
"transformers_version": "4.56.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 131078
|
||||||
|
}
|
||||||
135
mergekit_config.yml
Normal file
135
mergekit_config.yml
Normal file
@@ -0,0 +1,135 @@
|
|||||||
|
architecture: MistralForCausalLM
|
||||||
|
models:
|
||||||
|
## BASE ##
|
||||||
|
- model: B:\24B\Darkhn--Magistral-2509-24B-Text-Only
|
||||||
|
## 2501 ##
|
||||||
|
- model: B:\24B\!models--ReadyArt--4.2.0-Broken-Tutu-24b
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--ReadyArt--Broken-Tutu-24B-Transgression-v2.0
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\PrivateMerge29 # This merge is no longer available on HF
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--Nabbers1999--MS-24B-Bathory-GRPO
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--dphn--Dolphin-Mistral-24B-Venice-Edition
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--TroyDoesAI--BlackSheep-24B
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--aixonlab--Eurydice-24b-v3.5
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--Undi95--MistralThinker-v1.1
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
## 2503 ##
|
||||||
|
- model: B:\24B\!BeaverAI_Fallen-Mistral-Small-3.1-24B-v1e_textonly
|
||||||
|
parameters:
|
||||||
|
weight: 0.2
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
## 2506 ##
|
||||||
|
- model: B:\24B\!models--zerofata--MS3.2-PaintedFantasy-v2-24B
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
weight: 0.09
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--TheDrummer--Cydonia-24B-v4.3
|
||||||
|
parameters:
|
||||||
|
weight: 0.2
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--TheDrummer--Rivermind-24B-v1
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--trashpanda-org--MS3.2-24B-Mullein-v2
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--allura-forge--ms32-final-TEXTONLY
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--CrucibleLab--M3.2-24B-Loki-V1.3
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--Darkhn--M3.2-24B-Animus-V7.1
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\MuXodious--Hearthfire-24B-absolute-heresy
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--ReadyArt--Dark-Nexus-24B-v2.0
|
||||||
|
parameters:
|
||||||
|
weight: 0.1
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
## 2509##
|
||||||
|
- model: B:\24B\!models--TheDrummer--Precog-24B-v1
|
||||||
|
parameters:
|
||||||
|
weight: 0.2
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--TheDrummer--Magidonia-24B-v4.3
|
||||||
|
parameters:
|
||||||
|
weight: 0.2
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\llmfan46--MS3.2-PaintedFantasy-v4.1-24B-ultra-uncensored-heretic-v1
|
||||||
|
parameters:
|
||||||
|
weight: 0.2
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\!models--zerofata--MS3.2-PaintedFantasy-v3-24B
|
||||||
|
parameters:
|
||||||
|
weight: 0.2
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
- model: B:\24B\MuXodious--Tiamat-24B-Magistral-PaperWitch-heresy\textonly
|
||||||
|
parameters:
|
||||||
|
weight: 0.2
|
||||||
|
density: 0.9
|
||||||
|
epsilon: 0.09
|
||||||
|
merge_method: della
|
||||||
|
base_model: B:\24B\Darkhn--Magistral-2509-24B-Text-Only
|
||||||
|
parameters:
|
||||||
|
lambda: 1.0
|
||||||
|
normalize: false
|
||||||
|
int8_mask: false
|
||||||
|
rescale: true
|
||||||
|
tokenizer:
|
||||||
|
source: union
|
||||||
|
dtype: float32
|
||||||
|
out_dtype: bfloat16
|
||||||
|
name: C82
|
||||||
3
model-00001-of-00010.safetensors
Normal file
3
model-00001-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:805ec7f4d6f6dde9cb8231997b7d65a8167d5cb32896910989a3f2ac5a7b57f0
|
||||||
|
size 4907512192
|
||||||
3
model-00002-of-00010.safetensors
Normal file
3
model-00002-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:1a38133c55c8092da1bea125c56e01ea36a0ed820d7a4cba8ecb345b430e95a9
|
||||||
|
size 4781592832
|
||||||
3
model-00003-of-00010.safetensors
Normal file
3
model-00003-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:2b1b235607963d595323f7d85031bda091758762162cf4139e958aa56d445962
|
||||||
|
size 4781592816
|
||||||
3
model-00004-of-00010.safetensors
Normal file
3
model-00004-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:3d4e0152f669181d4de331c79ccc0b3012530f94c420fff2a280d39ad64a15da
|
||||||
|
size 4886471592
|
||||||
3
model-00005-of-00010.safetensors
Normal file
3
model-00005-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:d5dc389bc0c9db19e2c1310df210c1b5e93511e8164ba1fb643a5acf84f05e8e
|
||||||
|
size 4781592832
|
||||||
3
model-00006-of-00010.safetensors
Normal file
3
model-00006-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:ba33b8a665eebc602270513ec31f7e3b88d0c9092708acee450084d867b234a7
|
||||||
|
size 4781592816
|
||||||
3
model-00007-of-00010.safetensors
Normal file
3
model-00007-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:1425b4386fb86555348f59efd12ad1913e79fec128217e06748b7b156472a6d3
|
||||||
|
size 4886471592
|
||||||
3
model-00008-of-00010.safetensors
Normal file
3
model-00008-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:25b9b8e698055133e1b6472c1f856bd9cdd209c2ebd711ec694f8d3ce4d0d1a0
|
||||||
|
size 4781592832
|
||||||
3
model-00009-of-00010.safetensors
Normal file
3
model-00009-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:2e7f04a7ec63086da6ff1baf46ee7a22e1ba2877c6863e125e0f3c51dd451815
|
||||||
|
size 4781592800
|
||||||
3
model-00010-of-00010.safetensors
Normal file
3
model-00010-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:a4e7b274eca8054c784f777a428dcac9fb1ff03203c8760802aee1e4d84c673b
|
||||||
|
size 3774959456
|
||||||
371
model.safetensors.index.json
Normal file
371
model.safetensors.index.json
Normal file
@@ -0,0 +1,371 @@
|
|||||||
|
{
|
||||||
|
"metadata": {
|
||||||
|
"total_size": 47144929280,
|
||||||
|
"mergekit_version": "0.1.4"
|
||||||
|
},
|
||||||
|
"weight_map": {
|
||||||
|
"lm_head.weight": "model-00001-of-00010.safetensors",
|
||||||
|
"model.embed_tokens.weight": "model-00001-of-00010.safetensors",
|
||||||
|
"model.layers.0.input_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||||
|
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00010.safetensors",
|
||||||
|
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00010.safetensors",
|
||||||
|
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00010.safetensors",
|
||||||
|
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||||
|
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
||||||
|
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
||||||
|
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
||||||
|
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
|
||||||
|
"model.layers.1.input_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||||
|
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00010.safetensors",
|
||||||
|
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00010.safetensors",
|
||||||
|
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00010.safetensors",
|
||||||
|
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||||
|
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
||||||
|
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
||||||
|
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
||||||
|
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
|
||||||
|
"model.layers.10.input_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||||
|
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.11.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.12.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.13.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.14.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
|
||||||
|
"model.layers.14.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.14.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.14.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.14.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.14.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.14.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.14.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.15.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.15.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.15.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.15.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.15.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.15.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.15.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.15.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.15.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.16.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.16.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.16.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.16.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.16.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.16.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.16.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.16.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.16.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.17.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.17.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.17.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.17.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.17.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.17.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.17.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.17.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.17.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.18.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.18.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.18.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
|
||||||
|
"model.layers.18.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.18.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.18.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.18.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.18.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.18.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.19.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.19.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.19.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.19.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.19.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.19.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.19.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.19.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.19.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.2.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.2.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.2.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.2.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.2.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.2.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.2.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.2.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.2.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.20.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.20.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.20.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.20.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.20.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.20.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.20.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.20.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.20.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.21.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.21.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.21.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.21.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.21.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.21.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.21.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.21.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.21.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.22.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||||
|
"model.layers.22.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.22.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.22.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.22.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.22.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.22.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.22.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.22.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.23.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.23.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.23.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.23.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.23.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.23.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.23.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.23.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.23.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.24.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.24.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.24.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.24.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.24.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.24.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.24.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.24.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.24.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.25.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.25.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.25.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.25.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.25.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.25.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.25.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.25.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.25.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.26.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.26.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
|
||||||
|
"model.layers.26.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.26.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.26.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.26.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.26.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.26.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.26.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.27.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.27.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.27.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.27.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.27.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.27.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.27.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.27.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.27.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.28.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.28.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.28.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.28.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.28.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.28.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.28.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.28.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.28.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.29.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.29.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.29.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.29.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.29.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.29.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.29.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.29.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.29.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.3.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.3.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.3.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
|
||||||
|
"model.layers.3.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.3.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.3.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.3.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.3.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.3.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.30.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.30.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.30.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.30.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.30.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.30.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.30.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.30.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.30.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.31.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.31.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.31.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.31.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.31.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.31.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.31.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.31.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.31.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.32.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.32.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.32.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.32.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.32.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.32.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.32.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.32.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.32.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.33.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.33.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.33.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.33.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.33.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.33.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.33.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.33.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.33.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.34.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||||
|
"model.layers.34.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.34.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.34.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.34.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.34.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.34.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.34.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.34.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.35.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.35.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.35.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.35.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.35.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.35.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.35.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.35.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.35.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.36.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.36.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.36.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.36.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.36.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.36.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.36.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.36.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.36.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.37.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.37.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.37.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.37.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.37.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.37.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.37.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.37.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.37.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.38.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.38.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
|
||||||
|
"model.layers.38.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.38.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.38.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.38.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.38.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.38.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.38.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.39.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.39.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.39.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.39.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.39.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.39.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.39.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.39.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.39.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.4.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.4.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.4.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.4.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.4.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.4.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.4.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.4.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.4.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.5.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.5.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.5.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.5.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.5.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.5.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.5.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.5.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.5.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.6.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.6.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.6.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
|
||||||
|
"model.layers.6.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.6.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.6.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.6.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.6.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.6.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.7.input_layernorm.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.7.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.7.mlp.gate_proj.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.7.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.7.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.7.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.7.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.7.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.7.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.8.input_layernorm.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.8.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.8.mlp.gate_proj.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.8.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.8.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.8.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.8.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.8.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.8.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.9.input_layernorm.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.9.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.9.mlp.gate_proj.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.9.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.9.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.9.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.9.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.9.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.layers.9.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
|
||||||
|
"model.norm.weight": "model-00010-of-00010.safetensors"
|
||||||
|
}
|
||||||
|
}
|
||||||
1032
special_tokens_map.json
Normal file
1032
special_tokens_map.json
Normal file
File diff suppressed because it is too large
Load Diff
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:439f2cd468feda92550d47e24b9d47512941082d9558561bc6f92a83ee493b43
|
||||||
|
size 17079133
|
||||||
9069
tokenizer_config.json
Normal file
9069
tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user