初始化项目,由ModelHub XC社区提供模型
Model: USS-Inferprise/Dark-Cydonian-Wind-24B Source: Original Platform
This commit is contained in:
37
.gitattributes
vendored
Normal file
37
.gitattributes
vendored
Normal file
@@ -0,0 +1,37 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
dcw.png filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
51
README.md
Normal file
51
README.md
Normal file
@@ -0,0 +1,51 @@
|
||||
---
|
||||
license: mit
|
||||
base_model:
|
||||
- TheDrummer/Cydonia-24B-v4.3
|
||||
- ReadyArt/Omega-Darker_The-Final-Directive-24B
|
||||
- SicariusSicariiStuff/Redemption_Wind_24B
|
||||
tags:
|
||||
- merge
|
||||
- mergekit
|
||||
- mistral
|
||||
- roleplay
|
||||
- creative
|
||||
- NSFW
|
||||
language:
|
||||
- en
|
||||
pipeline_tag: text-generation
|
||||
---
|
||||
|
||||
# DARK CYDONIAN WIND (BF16)
|
||||
|
||||

|
||||
|
||||
Dark Cydonian Wind is a TIES merge of three Mistral-Small (24B) derivatives.
|
||||
|
||||
### The Recipe (TIES Merge)
|
||||
|
||||
The weights were balanced to maintain the narrative quality of Cydonia and Redemption Wind while adding a dash of spiciness from Omega Darker.
|
||||
|
||||
| Model | Weight | Role |
|
||||
| :--- | :--- | :--- |
|
||||
| **Cydonia-24B-v4.3** | 45% | Core backbone and general intelligence. |
|
||||
| **The-Omega-Darker-The-Final-Directive-24B** | 30% | Spiciness |
|
||||
| **Redemption Wind** | 25% | Reining in Omega Darker, unsloppiness and stability |
|
||||
|
||||
### 🚀 Key Improvements
|
||||
* **Large Context Ready:** Supports 48K-64K context windows.
|
||||
* **Smart 24B Architecture:** Fits perfectly on 24GB VRAM cards (and 16GB VRAM cards at Q4_K_M with a reduced context window).
|
||||
* **A Little Spice:** This model has just enough edge to create a worthwhile villain in creative writing without crossing the line into utter depravity. It should be able to make your evil sorcerer monologue to the captured knight in his dungeon without lecturing you on the immorality of kidnapping public officials.
|
||||
* **Less Slop:** While some slop remains, much of the slop from the parent models (looking at you here Omega Darker) appears to have been eliminated in the TIES merge process.
|
||||
|
||||
|
||||
### 🔧 Usage
|
||||
**Prompt Template:** Mistral-Small / Instruct
|
||||
```text
|
||||
[INST] {{system_prompt}}
|
||||
|
||||
{{user_prompt}} [/INST]
|
||||
```
|
||||
|
||||
> [!WARNING]
|
||||
> **This model incorporates models that have had refusal vectors weakened or removed. It may produce content unsuitable for minors or other vulnerable people. Caution is advised.**
|
||||
51
chat_template.jinja
Normal file
51
chat_template.jinja
Normal file
@@ -0,0 +1,51 @@
|
||||
{%- set today = strftime_now("%Y-%m-%d") %}
|
||||
{%- set default_system_message = "You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris.\nYour knowledge base was last updated on 2023-10-01. The current date is " + today + ".\n\nWhen you're not sure about some information, you say that you don't have the information and don't make up anything.\nIf the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. \"What are some good restaurants around me?\" => \"Where are you?\" or \"When is the next flight to Tokyo\" => \"Where do you travel from?\")" %}
|
||||
|
||||
{{- bos_token }}
|
||||
|
||||
{%- if messages[0]['role'] == 'system' %}
|
||||
{%- if messages[0]['content'] is string %}
|
||||
{%- set system_message = messages[0]['content'] %}
|
||||
{%- else %}
|
||||
{%- set system_message = messages[0]['content'][0]['text'] %}
|
||||
{%- endif %}
|
||||
{%- set loop_messages = messages[1:] %}
|
||||
{%- else %}
|
||||
{%- set system_message = default_system_message %}
|
||||
{%- set loop_messages = messages %}
|
||||
{%- endif %}
|
||||
{{- '[SYSTEM_PROMPT]' + system_message + '[/SYSTEM_PROMPT]' }}
|
||||
|
||||
{%- for message in loop_messages %}
|
||||
{%- if message['role'] == 'user' %}
|
||||
{%- if message['content'] is string %}
|
||||
{{- '[INST]' + message['content'] + '[/INST]' }}
|
||||
{%- else %}
|
||||
{{- '[INST]' }}
|
||||
{%- for block in message['content'] %}
|
||||
{%- if block['type'] == 'text' %}
|
||||
{{- block['text'] }}
|
||||
{%- elif block['type'] in ['image', 'image_url'] %}
|
||||
{{- '[IMG]' }}
|
||||
{%- else %}
|
||||
{{- raise_exception('Only text and image blocks are supported in message content!') }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{{- '[/INST]' }}
|
||||
{%- endif %}
|
||||
{%- elif message['role'] == 'system' %}
|
||||
{%- if message['content'] is string %}
|
||||
{{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}
|
||||
{%- else %}
|
||||
{{- '[SYSTEM_PROMPT]' + message['content'][0]['text'] + '[/SYSTEM_PROMPT]' }}
|
||||
{%- endif %}
|
||||
{%- elif message['role'] == 'assistant' %}
|
||||
{%- if message['content'] is string %}
|
||||
{{- message['content'] + eos_token }}
|
||||
{%- else %}
|
||||
{{- message['content'][0]['text'] + eos_token }}
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{{- raise_exception('Only user, system and assistant roles are supported!') }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
30
config.json
Normal file
30
config.json
Normal file
@@ -0,0 +1,30 @@
|
||||
{
|
||||
"architectures": [
|
||||
"MistralForCausalLM"
|
||||
],
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 1,
|
||||
"dtype": "bfloat16",
|
||||
"eos_token_id": 2,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 5120,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 32768,
|
||||
"max_position_embeddings": 131072,
|
||||
"model_type": "mistral",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 40,
|
||||
"num_key_value_heads": 8,
|
||||
"pad_token_id": 11,
|
||||
"rms_norm_eps": 1e-05,
|
||||
"rope_parameters": {
|
||||
"rope_theta": 1000000000.0,
|
||||
"rope_type": "default"
|
||||
},
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": false,
|
||||
"transformers_version": "5.5.3",
|
||||
"use_cache": false,
|
||||
"vocab_size": 131072
|
||||
}
|
||||
3
dcw.png
Normal file
3
dcw.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:52fba487209030dc4d300025ba5f0378635e4076ba1483e48886fbd033b9af9f
|
||||
size 1103594
|
||||
3
model-00001-of-00010.safetensors
Normal file
3
model-00001-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2caaf0508ecfd403d1483d4eee5c89a78384158f5a30139162c63fb551a4dd01
|
||||
size 4907389312
|
||||
3
model-00002-of-00010.safetensors
Normal file
3
model-00002-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4a71ec912fa68dd60a6c510c3d50efb4940b3564c44f325377dc39f1cdb7e4ee
|
||||
size 4781592832
|
||||
3
model-00003-of-00010.safetensors
Normal file
3
model-00003-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7f36b126d7a97177cb810bcf93d309879747ac916dbb3fe522d8b6f444e6586d
|
||||
size 4781592816
|
||||
3
model-00004-of-00010.safetensors
Normal file
3
model-00004-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1a63f4ab32231a325422080aedab5beacbfd0ea18b9f8cd66fbd1e363f5c1b6c
|
||||
size 4886471592
|
||||
3
model-00005-of-00010.safetensors
Normal file
3
model-00005-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:24bd2ef868dc68cf020d077c5a63b811f278ec61a9e5f18c747cd40851d44fff
|
||||
size 4781592832
|
||||
3
model-00006-of-00010.safetensors
Normal file
3
model-00006-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b10c6e695bb13b5930fda1154932f7c724679e36bcf5a3fa30deb323a8ff61be
|
||||
size 4781592816
|
||||
3
model-00007-of-00010.safetensors
Normal file
3
model-00007-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:491aaaccd5c460176bcd23356afffb10a32809ee948965ff3ba98119d065bca4
|
||||
size 4886471592
|
||||
3
model-00008-of-00010.safetensors
Normal file
3
model-00008-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f50c81daaacee09bc9960023899a5a717168fd87e22a7ab54b66b27911ffe6e9
|
||||
size 4781592832
|
||||
3
model-00009-of-00010.safetensors
Normal file
3
model-00009-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d04f66fc06cc151a9c21c0bcd9a5b7bfb669ee155cfe12af016c1d7c81a3eb6f
|
||||
size 4781592800
|
||||
3
model-00010-of-00010.safetensors
Normal file
3
model-00010-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ae3e76c3d8db3f1eaff0cd23b9637d199a953a0dff1dfa76e2490deb463e3c6d
|
||||
size 3774959456
|
||||
371
model.safetensors.index.json
Normal file
371
model.safetensors.index.json
Normal file
@@ -0,0 +1,371 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_size": 47144806400,
|
||||
"mergekit_version": "0.1.4"
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "model-00001-of-00010.safetensors",
|
||||
"model.embed_tokens.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.input_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.input_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.10.input_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.11.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.12.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.13.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.14.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.14.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.14.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.14.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.14.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.15.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.15.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.15.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.15.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.15.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.15.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.16.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.16.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.16.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.16.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.16.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.16.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.17.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.17.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.17.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.17.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.17.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.17.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.18.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.18.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.18.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.18.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.18.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.18.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.19.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.19.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.19.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.19.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.19.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.19.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.2.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.2.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.2.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.2.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.2.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.2.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.20.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.20.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.20.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.20.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.20.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.20.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.21.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.21.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.21.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.21.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.21.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.21.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.22.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.22.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.22.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.22.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.22.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.22.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.23.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.23.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.23.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.23.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.23.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.23.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.24.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.24.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.24.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.24.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.24.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.24.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.25.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.25.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.25.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.25.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.25.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.25.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.26.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.26.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.26.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.26.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.26.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.26.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.27.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.27.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.27.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.27.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.27.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.27.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.28.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.28.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.28.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.28.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.28.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.28.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.28.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.28.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.28.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.29.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.29.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.29.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.29.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.29.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.29.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.29.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.29.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.29.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.3.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.3.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.3.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.3.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.3.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.3.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.30.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.30.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.30.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.30.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.30.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.30.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.30.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.30.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.30.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.31.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.31.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.31.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.31.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.31.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.31.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.31.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.31.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.31.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.32.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.32.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.32.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.32.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.32.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.32.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.32.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.32.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.32.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.33.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.33.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.33.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.33.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.33.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.33.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.33.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.33.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.33.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.34.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.34.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.34.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.34.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.34.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.34.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.34.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.34.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.34.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.35.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.35.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.35.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.35.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.35.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.35.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.35.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.35.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.35.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.36.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.36.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.36.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.36.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.36.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.36.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.36.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.36.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.36.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.37.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.37.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.37.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.37.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.37.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.37.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.37.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.37.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.37.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.38.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.38.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.38.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.38.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.38.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.38.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.38.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.38.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.38.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.39.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.39.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.39.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.39.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.39.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.39.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.39.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.39.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.39.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.4.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.4.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.4.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.4.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.4.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.4.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.5.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.5.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.5.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.5.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.5.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.5.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.6.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.6.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.6.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.6.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.6.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.6.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.7.input_layernorm.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.7.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.7.mlp.gate_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.7.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.7.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.7.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.8.input_layernorm.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.8.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.8.mlp.gate_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.8.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.8.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.8.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.9.input_layernorm.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.9.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.9.mlp.gate_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.9.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.9.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.9.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.norm.weight": "model-00010-of-00010.safetensors"
|
||||
}
|
||||
}
|
||||
7007
special_tokens_map.json
Normal file
7007
special_tokens_map.json
Normal file
File diff suppressed because it is too large
Load Diff
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b76085f9923309d873994d444989f7eb6ec074b06f25b58f1e8d7b7741070949
|
||||
size 17078037
|
||||
9019
tokenizer_config.json
Normal file
9019
tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user