初始化项目,由ModelHub XC社区提供模型

Model: Ateron/Sketch-Cydonia
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-06-06 00:18:21 +08:00
commit 4ffa9f6e2b
20 changed files with 10638 additions and 0 deletions

36
.gitattributes vendored Normal file
View File

@@ -0,0 +1,36 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
tokenizer.json filter=lfs diff=lfs merge=lfs -text

43
README.md Normal file
View File

@@ -0,0 +1,43 @@
---
base_model:
- TheDrummer/Cydonia-24B-v4.3
- zerofata/MS3.2-PaintedFantasy-v4.1-24B
library_name: transformers
tags:
- mergekit
- merge
- roleplay
language:
- en
pipeline_tag: text-generation
---
# Sketch-Cydonia
![00118-282965708](https://cdn-uploads.huggingface.co/production/uploads/6390f888de25f9eda571b0f2/DDyVSwrYUqRKvd1BNbNec.png)
Sketch is something simple, a freehand drawing that suppose to show an idea.
So with that in mind I made this simple merge between two most interesting models in rance of 24B.
### Configuration
The following YAML configuration was used to produce this model:
```yaml
models:
- model: E:\AI\Cydonia 4.3
parameters:
density: [1.0, 0.75, 0.5, 0.2]
weight: 1.0
- model: E:\AI\Painted Fantasy 4.1
parameters:
density: 0.35
weight: [0, 0.1, 0.35, 0.5]
merge_method: ties
base_model: E:\AI\Cydonia 4.3
parameters:
normalize: true
int8_mask: true
dtype: float32
out_dtype: bfloat16
```

51
chat_template.jinja Normal file
View File

@@ -0,0 +1,51 @@
{%- set today = strftime_now("%Y-%m-%d") %}
{%- set default_system_message = "You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris.\nYour knowledge base was last updated on 2023-10-01. The current date is " + today + ".\n\nWhen you're not sure about some information, you say that you don't have the information and don't make up anything.\nIf the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. \"What are some good restaurants around me?\" => \"Where are you?\" or \"When is the next flight to Tokyo\" => \"Where do you travel from?\")" %}
{{- bos_token }}
{%- if messages[0]['role'] == 'system' %}
{%- if messages[0]['content'] is string %}
{%- set system_message = messages[0]['content'] %}
{%- else %}
{%- set system_message = messages[0]['content'][0]['text'] %}
{%- endif %}
{%- set loop_messages = messages[1:] %}
{%- else %}
{%- set system_message = default_system_message %}
{%- set loop_messages = messages %}
{%- endif %}
{{- '[SYSTEM_PROMPT]' + system_message + '[/SYSTEM_PROMPT]' }}
{%- for message in loop_messages %}
{%- if message['role'] == 'user' %}
{%- if message['content'] is string %}
{{- '[INST]' + message['content'] + '[/INST]' }}
{%- else %}
{{- '[INST]' }}
{%- for block in message['content'] %}
{%- if block['type'] == 'text' %}
{{- block['text'] }}
{%- elif block['type'] in ['image', 'image_url'] %}
{{- '[IMG]' }}
{%- else %}
{{- raise_exception('Only text and image blocks are supported in message content!') }}
{%- endif %}
{%- endfor %}
{{- '[/INST]' }}
{%- endif %}
{%- elif message['role'] == 'system' %}
{%- if message['content'] is string %}
{{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}
{%- else %}
{{- '[SYSTEM_PROMPT]' + message['content'][0]['text'] + '[/SYSTEM_PROMPT]' }}
{%- endif %}
{%- elif message['role'] == 'assistant' %}
{%- if message['content'] is string %}
{{- message['content'] + eos_token }}
{%- else %}
{{- message['content'][0]['text'] + eos_token }}
{%- endif %}
{%- else %}
{{- raise_exception('Only user, system and assistant roles are supported!') }}
{%- endif %}
{%- endfor %}

27
config.json Normal file
View File

@@ -0,0 +1,27 @@
{
"architectures": [
"MistralForCausalLM"
],
"attention_dropout": 0.0,
"bos_token_id": 1,
"dtype": "bfloat16",
"eos_token_id": 2,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 5120,
"initializer_range": 0.02,
"intermediate_size": 32768,
"max_position_embeddings": 131072,
"model_type": "mistral",
"num_attention_heads": 32,
"num_hidden_layers": 40,
"num_key_value_heads": 8,
"pad_token_id": 11,
"rms_norm_eps": 1e-05,
"rope_theta": 1000000000.0,
"sliding_window": null,
"tie_word_embeddings": false,
"transformers_version": "4.57.6",
"use_cache": false,
"vocab_size": 131072
}

10
generation_config.json Normal file
View File

@@ -0,0 +1,10 @@
{
"_from_model_config": true,
"bos_token_id": 1,
"do_sample": true,
"eos_token_id": [
2
],
"pad_token_id": 11,
"transformers_version": "4.57.1"
}

16
mergekit_config.yml Normal file
View File

@@ -0,0 +1,16 @@
models:
- model: E:\AI\Cydonia 4.3
parameters:
density: [1.0, 0.75, 0.5, 0.2]
weight: 1.0
- model: E:\AI\Painted Fantasy 4.1
parameters:
density: 0.35
weight: [0, 0.1, 0.35, 0.5]
merge_method: ties
base_model: E:\AI\Cydonia 4.3
parameters:
normalize: true
int8_mask: true
dtype: float32
out_dtype: bfloat16

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9e6aba1e8787c93dff424ecc2f82dfc0f33ebc7d74a5e674acdc62e819ebd5e4
size 4907389312

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:08881a76780d905a31f565e34525a6deefc1141c3a1766c45da8a5cda4968132
size 4781592832

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9c28a89d613c78636ab6e9f31c2bea39565b817572eb4085b29ba5b28ca5b65f
size 4781592816

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:68d9a2660adc9cbfda19cdd2f18e50fd880bc33684b34369c6182b8b16c8975a
size 4886471592

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d7484799d50c789a46f1386b62302d65be317a9db6eabd1f55bf4260230e7435
size 4781592832

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:beb320d0c4b58d4ba9cc7f214e204febea05b1e93c26e8422c5546ac9e6212d2
size 4781592816

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:677da5c304dd13d45fafde106b4fda7c1100e31d731a05a3c5e2541afc7b6aa4
size 4886471592

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:25d86573a20466eaf7243f645bd21271b00d93bb095eec2d01fddd7e72a6303f
size 4781592832

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:dc3c8f2d86c547f4753efe09c6aba80895af59d27a48d7ba92ec7a79d4366ff3
size 4781592800

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:cbca06204833a170611bc9e33bdb5d7fdf819455a97d0c7f4c3eaef371d8e4ae
size 3774959456

View File

@@ -0,0 +1,371 @@
{
"metadata": {
"total_size": 47144806400,
"mergekit_version": "0.1.4"
},
"weight_map": {
"lm_head.weight": "model-00001-of-00010.safetensors",
"model.embed_tokens.weight": "model-00001-of-00010.safetensors",
"model.layers.0.input_layernorm.weight": "model-00001-of-00010.safetensors",
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.input_layernorm.weight": "model-00001-of-00010.safetensors",
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.10.input_layernorm.weight": "model-00001-of-00010.safetensors",
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.11.input_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.12.input_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.13.input_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.14.input_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.14.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.14.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.14.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.14.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.14.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.14.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.14.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.15.input_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.15.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.15.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.15.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.15.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.15.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.15.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.15.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.15.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.16.input_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.16.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.16.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.16.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.16.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.16.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.16.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.16.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.16.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.17.input_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.17.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.17.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.17.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.17.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.17.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.17.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.17.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.17.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.18.input_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.18.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.18.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.18.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.18.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.18.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.18.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.18.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.18.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.19.input_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.19.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.19.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.19.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.19.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.19.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.19.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.19.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.19.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.2.input_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.2.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.2.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.2.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.2.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.2.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.2.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.2.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.2.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.20.input_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.20.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.20.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.20.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.20.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.20.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.20.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.20.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.20.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.21.input_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.21.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.21.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.21.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.21.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.21.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.21.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.21.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.21.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.22.input_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.22.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.22.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.22.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.22.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.22.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.22.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.22.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.22.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.23.input_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.23.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.23.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.23.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.23.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.23.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.23.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.23.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.23.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.24.input_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.24.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.24.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.24.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.24.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.24.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.24.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.24.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.24.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.25.input_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.25.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.25.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.25.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.25.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.25.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.25.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.25.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.25.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.26.input_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.26.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.26.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.26.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.26.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.26.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.26.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.26.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.26.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.27.input_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.27.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.27.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.27.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.27.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.27.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.27.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.27.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.27.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.28.input_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.28.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.28.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.28.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.28.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.28.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.28.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.28.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.28.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.29.input_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.29.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.29.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.29.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.29.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.29.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.29.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.29.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.29.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.3.input_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.3.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.3.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.3.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.3.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.3.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.3.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.3.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.3.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.30.input_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.30.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.30.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.30.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.30.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.30.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.30.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.30.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.30.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.31.input_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.31.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.31.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.31.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.31.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.31.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.31.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.31.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.31.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.32.input_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.32.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.32.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.32.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.32.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.32.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.32.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.32.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.32.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.33.input_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.33.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.33.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.33.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.33.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.33.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.33.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.33.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.33.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.34.input_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.34.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.34.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.34.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.34.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.34.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.34.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.34.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.34.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.35.input_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.35.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.35.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.35.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.35.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.35.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.35.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.35.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.35.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.36.input_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.36.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.36.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.36.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.36.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.36.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.36.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.36.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.36.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.37.input_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.37.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.37.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.37.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.37.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.37.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.37.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.37.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.37.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.38.input_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.38.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.38.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.38.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.38.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.38.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.38.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.38.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.38.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.39.input_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.39.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.39.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.39.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.39.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.39.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.39.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.39.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.39.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.4.input_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.4.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.4.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.4.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.4.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.4.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.4.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.4.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.4.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.5.input_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.5.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.5.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.5.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.5.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.5.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.5.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.5.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.5.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.6.input_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.6.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.6.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.6.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.6.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.6.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.6.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.6.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.6.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.7.input_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.7.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.7.mlp.gate_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.7.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.7.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.7.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.7.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.7.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.7.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.8.input_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.8.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.8.mlp.gate_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.8.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.8.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.8.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.8.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.8.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.8.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.9.input_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.9.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.9.mlp.gate_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.9.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.9.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.9.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.9.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.9.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.9.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
"model.norm.weight": "model-00010-of-00010.safetensors"
}
}

1032
special_tokens_map.json Normal file

File diff suppressed because it is too large Load Diff

3
tokenizer.json Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b76085f9923309d873994d444989f7eb6ec074b06f25b58f1e8d7b7741070949
size 17078037

9019
tokenizer_config.json Normal file

File diff suppressed because it is too large Load Diff