初始化项目,由ModelHub XC社区提供模型

Model: 24B-Suite/Mergedonia-KARCHER-24B-v1
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-26 16:34:16 +08:00
commit b275b1d4e6
19 changed files with 10650 additions and 0 deletions

36
.gitattributes vendored Normal file
View File

@@ -0,0 +1,36 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
tokenizer.json filter=lfs diff=lfs merge=lfs -text

34
README.md Normal file
View File

@@ -0,0 +1,34 @@
---
license: apache-2.0
base_model:
- Naphula/BeaverAI_Fallen-Mistral-Small-3.1-24B-v1e_textonly
- TheDrummer/Cydonia-24B-v4.3
- TheDrummer/Magidonia-24B-v4.3
- TheDrummer/Precog-24B-v1
- TheDrummer/Rivermind-24B-v1
language:
- en
library_name: transformers
tags:
- mergekit
- merge
- mistral
---
```yaml
architecture: MistralForCausalLM
models:
- model: B:\24B\!BeaverAI_Fallen-Mistral-Small-3.1-24B-v1e_textonly
- model: B:\24B\!models--TheDrummer--Magidonia-24B-v4.3
- model: B:\24B\!models--TheDrummer--Precog-24B-v1
- model: B:\24B\!models--TheDrummer--Rivermind-24B-v1
- model: B:\24B\!models--TheDrummer--Cydonia-24B-v4.3
merge_method: karcher
dtype: float32
out_dtype: bfloat16
parameters:
tol: 1.0e-9
max_iter: 1000
tokenizer_source: union
name: Mergedonia-Karcher-24B-v1
```

51
chat_template.jinja Normal file
View File

@@ -0,0 +1,51 @@
{%- set today = strftime_now("%Y-%m-%d") %}
{%- set default_system_message = "You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris.\nYour knowledge base was last updated on 2023-10-01. The current date is " + today + ".\n\nWhen you're not sure about some information, you say that you don't have the information and don't make up anything.\nIf the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. \"What are some good restaurants around me?\" => \"Where are you?\" or \"When is the next flight to Tokyo\" => \"Where do you travel from?\")" %}
{{- bos_token }}
{%- if messages[0]['role'] == 'system' %}
{%- if messages[0]['content'] is string %}
{%- set system_message = messages[0]['content'] %}
{%- else %}
{%- set system_message = messages[0]['content'][0]['text'] %}
{%- endif %}
{%- set loop_messages = messages[1:] %}
{%- else %}
{%- set system_message = default_system_message %}
{%- set loop_messages = messages %}
{%- endif %}
{{- '[SYSTEM_PROMPT]' + system_message + '[/SYSTEM_PROMPT]' }}
{%- for message in loop_messages %}
{%- if message['role'] == 'user' %}
{%- if message['content'] is string %}
{{- '[INST]' + message['content'] + '[/INST]' }}
{%- else %}
{{- '[INST]' }}
{%- for block in message['content'] %}
{%- if block['type'] == 'text' %}
{{- block['text'] }}
{%- elif block['type'] in ['image', 'image_url'] %}
{{- '[IMG]' }}
{%- else %}
{{- raise_exception('Only text and image blocks are supported in message content!') }}
{%- endif %}
{%- endfor %}
{{- '[/INST]' }}
{%- endif %}
{%- elif message['role'] == 'system' %}
{%- if message['content'] is string %}
{{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}
{%- else %}
{{- '[SYSTEM_PROMPT]' + message['content'][0]['text'] + '[/SYSTEM_PROMPT]' }}
{%- endif %}
{%- elif message['role'] == 'assistant' %}
{%- if message['content'] is string %}
{{- message['content'] + eos_token }}
{%- else %}
{{- message['content'][0]['text'] + eos_token }}
{%- endif %}
{%- else %}
{{- raise_exception('Only user, system and assistant roles are supported!') }}
{%- endif %}
{%- endfor %}

27
config.json Normal file
View File

@@ -0,0 +1,27 @@
{
"architectures": [
"MistralForCausalLM"
],
"attention_dropout": 0.0,
"bos_token_id": 1,
"dtype": "bfloat16",
"eos_token_id": 2,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 5120,
"initializer_range": 0.02,
"intermediate_size": 32768,
"max_position_embeddings": 131072,
"model_type": "mistral",
"num_attention_heads": 32,
"num_hidden_layers": 40,
"num_key_value_heads": 8,
"pad_token_id": 11,
"rms_norm_eps": 1e-05,
"rope_theta": 1000000000.0,
"sliding_window": null,
"tie_word_embeddings": false,
"transformers_version": "4.56.1",
"use_cache": false,
"vocab_size": 131076
}

15
mergekit_config.yml Normal file
View File

@@ -0,0 +1,15 @@
architecture: MistralForCausalLM
models:
- model: B:\24B\!BeaverAI_Fallen-Mistral-Small-3.1-24B-v1e_textonly
- model: B:\24B\!models--TheDrummer--Magidonia-24B-v4.3
- model: B:\24B\!models--TheDrummer--Precog-24B-v1
- model: B:\24B\!models--TheDrummer--Rivermind-24B-v1
- model: B:\24B\!models--TheDrummer--Cydonia-24B-v4.3
merge_method: karcher
dtype: float32
out_dtype: bfloat16
parameters:
tol: 1.0e-9
max_iter: 1000
tokenizer_source: union
name: Mergedonia-Karcher

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:2ccd5e6f0934f9cf23fad2de5782e4542fa34676aebb2666a2ef4a0bf5dbc650
size 4907471232

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:0c5659ed1e3f82eb7f55f729c6a4bf86149332188335854027b4c16b533a310a
size 4781592832

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:bfa5880f7866d4fcf9f002034da3b384494f4cbd2222348e3e4c012a229bbc5b
size 4781592816

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5afe45a833a636d5b68beb6939f4b7591bf3d5e36ac9057788df8620ff1b9cf0
size 4886471592

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d704ec8d176c0f4a9bf0d56558967a23a899125b962faf4cbde79987553fe856
size 4781592832

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:390a7860418da2c74c7096c3a41cb4f41ec50d050b331763ca1d3fa4c7bc992f
size 4781592816

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:73e013fa9e91137274fb19461cdc556f1aee96a3f20f20255cc37d047bc73680
size 4886471592

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1fd7acd2b1382b0e8c43a7f1caed29f7a3c0c17339c0ee1f63eaf939e1acbc57
size 4781592832

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:090e8c13aaeee3790dd9a3d2ec1ee45fb93d44916e1736691fdfba2107313ce9
size 4781592800

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4e61addeda39b15e0ddaf321c28594f1da41e1145015a36a0274c70d9aa7a868
size 3774959456

View File

@@ -0,0 +1,371 @@
{
"metadata": {
"total_size": 47144888320,
"mergekit_version": "0.1.4"
},
"weight_map": {
"lm_head.weight": "model-00001-of-00010.safetensors",
"model.embed_tokens.weight": "model-00001-of-00010.safetensors",
"model.layers.0.input_layernorm.weight": "model-00001-of-00010.safetensors",
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.input_layernorm.weight": "model-00001-of-00010.safetensors",
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
"model.layers.10.input_layernorm.weight": "model-00001-of-00010.safetensors",
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.11.input_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.12.input_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.13.input_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.14.input_layernorm.weight": "model-00002-of-00010.safetensors",
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
"model.layers.14.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.14.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.14.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.14.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.14.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.14.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.14.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.15.input_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.15.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.15.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.15.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.15.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.15.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.15.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.15.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.15.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.16.input_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.16.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.16.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.16.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.16.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.16.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.16.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.16.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.16.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.17.input_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.17.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.17.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.17.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.17.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.17.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.17.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.17.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.17.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.18.input_layernorm.weight": "model-00003-of-00010.safetensors",
"model.layers.18.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.18.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
"model.layers.18.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.18.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.18.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.18.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.18.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.18.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.19.input_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.19.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.19.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.19.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.19.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.19.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.19.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.19.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.19.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.2.input_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.2.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.2.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.2.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.2.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.2.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.2.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.2.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.2.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.20.input_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.20.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.20.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.20.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.20.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.20.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.20.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.20.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.20.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.21.input_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.21.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.21.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.21.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.21.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.21.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.21.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.21.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.21.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
"model.layers.22.input_layernorm.weight": "model-00004-of-00010.safetensors",
"model.layers.22.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.22.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.22.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.22.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.22.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.22.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.22.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.22.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.23.input_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.23.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.23.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.23.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.23.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.23.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.23.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.23.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.23.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.24.input_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.24.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.24.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.24.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.24.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.24.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.24.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.24.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.24.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.25.input_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.25.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.25.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.25.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.25.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.25.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.25.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.25.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.25.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.26.input_layernorm.weight": "model-00005-of-00010.safetensors",
"model.layers.26.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
"model.layers.26.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.26.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.26.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.26.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.26.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.26.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.26.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.27.input_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.27.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.27.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.27.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.27.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.27.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.27.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.27.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.27.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.28.input_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.28.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.28.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.28.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.28.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.28.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.28.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.28.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.28.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.29.input_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.29.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.29.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.29.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.29.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.29.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.29.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.29.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.29.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.3.input_layernorm.weight": "model-00006-of-00010.safetensors",
"model.layers.3.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.3.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
"model.layers.3.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.3.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.3.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.3.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.3.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.3.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.30.input_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.30.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.30.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.30.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.30.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.30.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.30.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.30.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.30.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.31.input_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.31.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.31.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.31.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.31.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.31.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.31.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.31.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.31.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.32.input_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.32.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.32.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.32.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.32.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.32.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.32.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.32.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.32.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.33.input_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.33.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.33.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.33.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.33.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.33.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.33.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.33.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.33.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
"model.layers.34.input_layernorm.weight": "model-00007-of-00010.safetensors",
"model.layers.34.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.34.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.34.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.34.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.34.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.34.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.34.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.34.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.35.input_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.35.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.35.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.35.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.35.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.35.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.35.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.35.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.35.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.36.input_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.36.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.36.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.36.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.36.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.36.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.36.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.36.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.36.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.37.input_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.37.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.37.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.37.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.37.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.37.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.37.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.37.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.37.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.38.input_layernorm.weight": "model-00008-of-00010.safetensors",
"model.layers.38.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
"model.layers.38.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.38.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.38.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.38.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.38.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.38.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.38.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.39.input_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.39.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.39.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.39.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.39.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.39.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.39.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.39.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.39.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.4.input_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.4.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.4.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.4.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.4.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.4.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.4.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.4.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.4.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.5.input_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.5.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.5.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.5.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.5.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.5.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.5.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.5.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.5.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.6.input_layernorm.weight": "model-00009-of-00010.safetensors",
"model.layers.6.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.6.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
"model.layers.6.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.6.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.6.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.6.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.6.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.6.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.7.input_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.7.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.7.mlp.gate_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.7.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.7.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.7.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.7.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.7.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.7.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.8.input_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.8.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.8.mlp.gate_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.8.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.8.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.8.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.8.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.8.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.8.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.9.input_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.9.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.9.mlp.gate_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.9.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.9.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
"model.layers.9.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.9.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.9.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
"model.layers.9.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
"model.norm.weight": "model-00010-of-00010.safetensors"
}
}

1032
special_tokens_map.json Normal file

File diff suppressed because it is too large Load Diff

3
tokenizer.json Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:05230ffd53b91df6d926b85fd13c0899a5535901d87d063fea07e31871bfd6d7
size 17078775

9051
tokenizer_config.json Normal file

File diff suppressed because it is too large Load Diff