初始化项目,由ModelHub XC社区提供模型
Model: dicta-il/DictaLM-3.0-24B-Thinking Source: Original Platform
This commit is contained in:
37
.gitattributes
vendored
Normal file
37
.gitattributes
vendored
Normal file
@@ -0,0 +1,37 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
tekken.json filter=lfs diff=lfs merge=lfs -text
|
||||
102
README.md
Normal file
102
README.md
Normal file
@@ -0,0 +1,102 @@
|
||||
---
|
||||
license: apache-2.0
|
||||
pipeline_tag: text-generation
|
||||
language:
|
||||
- en
|
||||
- he
|
||||
tags:
|
||||
- pretrained
|
||||
inference:
|
||||
parameters:
|
||||
temperature: 0.6
|
||||
---
|
||||
|
||||
[<img src="https://i.ibb.co/5Lbwyr1/dicta-logo.jpg" width="300px"/>](https://dicta.org.il)
|
||||
|
||||
# Dicta-LM 3.0: Advancing The Frontier of Hebrew Sovereign LLMs
|
||||
|
||||
Dicta-LM 3.0 is a powerful open-weight collection of LLMs, trained on extensive corpora of Hebrew and English texts. The models are available for download and for unlimited use. The models set a new SOTA for their weight-class for Hebrew, both as base models and chat models.
|
||||
|
||||
This is our flagship model, a 24-billion-parameter *reasoning* model, with full precision (BF16), originally initialized from [Mistral-Small-3.1-24B-Base-2503](https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Base-2503).
|
||||
|
||||
This model is a reasoning chat model, which means that before responding to any given message from the user, the model first thinks out the right way to respond in a designated thinking block.
|
||||
|
||||
<br/>
|
||||
|
||||
🚀 Try it out here: [chat.dicta.org.il](https://chat.dicta.org.il)
|
||||
|
||||
<br/>
|
||||
|
||||
For full details of this model please read our [release blog post](https://dicta.org.il/dicta-lm-3) or the [technical report](https://www.dicta.org.il/publications/DictaLM_3_0___Techincal_Report.pdf).
|
||||
|
||||
You can view and access the full collection of base/instruct unquantized/quantized versions of `DictaLM 3.0` [here](https://huggingface.co/collections/dicta-il/dictalm-30-collection).
|
||||
|
||||
## Instruction format
|
||||
|
||||
In order to leverage instruction fine-tuning, your prompt should be rendered using the chat template specified for this model. Most libraries deal with this automatically, so you can just let them do it.
|
||||
|
||||
## Usage
|
||||
|
||||
We recommend using vLLM, but you can use Transformers as well:
|
||||
|
||||
### Transformers
|
||||
|
||||
```python
|
||||
|
||||
from transformers import pipeline
|
||||
|
||||
generator = pipeline('text-generation', model="dicta-il/DictaLM-3.0-24B-Thinking")
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": "איזה רוטב אהוב עליך?"},
|
||||
{"role": "assistant", "content": "טוב, אני די מחבב כמה טיפות מיץ לימון סחוט טרי. זה מוסיף בדיוק את הכמות הנכונה של טעם חמצמץ לכל מה שאני מבשל במטבח!"},
|
||||
{"role": "user", "content": "האם יש לך מתכונים למיונז?"}
|
||||
]
|
||||
|
||||
print(generator(messages)[0]['generated_text'][-1]) # just print the last message
|
||||
# {'role': 'assistant', 'content': 'המשתמש שואל: "האם יש לך מתכונים למיונז?" עלינו להגיב עם מתכון למיונז. ספקו מתכון קלאסי, אולי גם וריאציות (מיונז פליאו, מיונז טבעוני, מיונז חריף, וכו\'). ספקו הוראות שלב אחר שלב. צריך להיות בטון ידידותי.</think>בטח! מיונז ביתי הוא באמת פשוט להכנה ומעניק לכם שליטה מלאה על המרכיבים, הטעם והמרקם. להלן המתכון הקלאסי, בתוספת כמה וריאציות מהנות שתוכלו להתנסות בהן.\n\n---\n\n## 1️⃣ מיונז קלאסי (מיונז בסיסי)\n\n| מרכיב | כמות |\n|------------|----------|\n| חלמון ביצה גדול (או 2 חלמונים בינוניים) | 1 |\n| חרדל דיז\'ון | 1 כפית |\n| מיץ לימון טרי (או ח'}
|
||||
```
|
||||
|
||||
### vLLM
|
||||
|
||||
```bash
|
||||
vllm serve dicta-il/DictaLM-3.0-24B-Thinking --enable-auto-tool-choice --tool-call-parser hermes --reasoning_parser deepseek_r1
|
||||
```
|
||||
|
||||
And then you can access it via the openai library:
|
||||
|
||||
```python
|
||||
from openai import OpenAI
|
||||
|
||||
client = OpenAI(
|
||||
base_url="http://localhost:8000/v1",
|
||||
api_key="sk-no-key-required"
|
||||
)
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model="dicta-il/DictaLM-3.0-24B-Thinking",
|
||||
messages=[
|
||||
{"role": "user", "content": "Hello, how are you?"}
|
||||
],
|
||||
)
|
||||
|
||||
print(response.choices[0].message.content)
|
||||
```
|
||||
|
||||
> The reasoning traces should be available in the response structure in the designated fild.
|
||||
|
||||
The model supports tool-calling, enabling integration with external tools and APIs. For example how to use the tool calling, see the [vLLM documentation](https://docs.vllm.ai/en/stable/features/tool_calling/#tool-calling).
|
||||
|
||||
## Citation
|
||||
|
||||
If you use this model, please cite:
|
||||
|
||||
```bibtex
|
||||
@article{Shmidman2025DictaLM3,
|
||||
title={{Dicta-LM 3.0: Advancing The Frontier of Hebrew Sovereign LLMs}},
|
||||
author={Shaltiel Shmidman and Avi Shmidman and Amir DN Cohen and Moshe Koppel},
|
||||
year={2025},
|
||||
publisher={{DICTA / Jerusalem, Israel}},
|
||||
note={https://www.dicta.org.il/publications/DictaLM_3_0___Techincal_Report.pdf}
|
||||
}
|
||||
```
|
||||
92
chat_template.jinja
Normal file
92
chat_template.jinja
Normal file
@@ -0,0 +1,92 @@
|
||||
{# ───── header (system message) ───── #}
|
||||
{{- "<|im_start|>system\n" -}}
|
||||
{# ───── get the custom instructions + optional thinking override ───── #}
|
||||
{%- if messages[0].role == "system" -%}
|
||||
{%- set custom_instructions = messages[0].content.rstrip() -%}
|
||||
{%- endif -%}
|
||||
{# ───── set the system prompt ───── #}
|
||||
{%- if custom_instructions -%}
|
||||
{{- custom_instructions -}}
|
||||
{%- else -%}
|
||||
{{- "You are a helpful AI assistant named Dicta-LM 3.0, Trained by Dicta, the Israel Center for Text Analysis. Your role is to provide accurate, helpful, and well-structured responses to user questions and requests.\nProvide clear, logical, and precise answers that thoroughly address what the user is asking for. Structure your responses in a way that is easy to understand and follow. You are Dicta-LM 3.0." -}}
|
||||
{%- endif -%}
|
||||
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages | length - 1) -%}
|
||||
{%- for message in messages[::-1] -%}
|
||||
{%- set index = messages | length - 1 - loop.index0 -%}
|
||||
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not (message.content.startswith("<tool_response>") and message.content.endswith("</tool_response>")) -%}
|
||||
{%- set ns.multi_step_tool = false -%}
|
||||
{%- set ns.last_query_index = index -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{%- if tools -%}
|
||||
{{- "\n\n## Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" -}}
|
||||
{%- for tool in tools -%}
|
||||
{{- "\n" -}}
|
||||
{{- tool | tojson -}}
|
||||
{%- endfor -%}
|
||||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call>" -}}
|
||||
{%- endif -%}
|
||||
{{- "<|im_end|>" -}}
|
||||
{%- for message in messages -%}
|
||||
{%- if message.content is string -%}
|
||||
{%- set content = message.content -%}
|
||||
{%- else -%}
|
||||
{%- set content = "" -%}
|
||||
{%- endif -%}
|
||||
{%- if message.role == "user" or message.role == "system" and not loop.first -%}
|
||||
{{- "\n<|im_start|>" + message.role + "\n" + content + "<|im_end|>" -}}
|
||||
{%- elif message.role == "assistant" -%}
|
||||
{% generation %}
|
||||
{%- set reasoning_content = "" -%}
|
||||
{%- if message.reasoning_content is string -%}
|
||||
{%- set reasoning_content = message.reasoning_content -%}
|
||||
{%- elif "</think>" in content -%}
|
||||
{%- set reasoning_content = content.split("</think>")[0].rstrip("\n").split("<think>")[-1].lstrip("\n") -%}
|
||||
{%- set content = content.split("</think>")[-1].lstrip("\n") -%}
|
||||
{%- endif -%}
|
||||
{%- if loop.index0 > ns.last_query_index -%}
|
||||
{%- if loop.last or not loop.last and reasoning_content -%}
|
||||
{{- "\n<|im_start|>" + message.role + "\n<think>" + reasoning_content.strip("\n") + "</think>" + content.lstrip("\n") -}}
|
||||
{%- else -%}
|
||||
{{- "\n<|im_start|>" + message.role + "\n" + content -}}
|
||||
{%- endif -%}
|
||||
{%- else -%}
|
||||
{{- "\n<|im_start|>" + message.role + "\n" + content -}}
|
||||
{%- endif -%}
|
||||
{%- if message.tool_calls -%}
|
||||
{%- for tool_call in message.tool_calls -%}
|
||||
{%- if loop.first and content or not loop.first -%}
|
||||
{{- "\n" -}}
|
||||
{%- endif -%}
|
||||
{%- if tool_call.function -%}
|
||||
{%- set tool_call = tool_call.function -%}
|
||||
{%- endif -%}
|
||||
{{- "<tool_call>\n{\"name\": \"" -}}
|
||||
{{- tool_call.name -}}
|
||||
{{- "\", \"arguments\": " -}}
|
||||
{%- if tool_call.arguments is string -%}
|
||||
{{- tool_call.arguments -}}
|
||||
{%- else -%}
|
||||
{{- tool_call.arguments | tojson -}}
|
||||
{%- endif -%}
|
||||
{{- "}\n</tool_call>" -}}
|
||||
{%- endfor -%}
|
||||
{%- endif -%}
|
||||
{{- "<|im_end|>" -}}
|
||||
{% endgeneration %}
|
||||
{%- elif message.role == "tool" -%}
|
||||
{%- if loop.first or messages[loop.index0 - 1].role != "tool" -%}
|
||||
{{- "\n<|im_start|>user" -}}
|
||||
{%- endif -%}
|
||||
{{- "\n<tool_response>\n" -}}
|
||||
{{- content -}}
|
||||
{{- "\n</tool_response>" -}}
|
||||
{%- if loop.last or messages[loop.index0 + 1].role != "tool" -%}
|
||||
{{- "<|im_end|>" -}}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{{- "\n" -}}
|
||||
{%- if add_generation_prompt -%}
|
||||
{{- "<|im_start|>assistant\n<think>" -}}
|
||||
{%- endif -%}
|
||||
26
config.json
Normal file
26
config.json
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"architectures": [
|
||||
"MistralForCausalLM"
|
||||
],
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 1,
|
||||
"eos_token_id": 2,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 5120,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 32768,
|
||||
"max_position_embeddings": 65280,
|
||||
"model_type": "mistral",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 40,
|
||||
"num_key_value_heads": 8,
|
||||
"rms_norm_eps": 1e-05,
|
||||
"rope_theta": 1000000.0,
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": false,
|
||||
"torch_dtype": "bfloat16",
|
||||
"transformers_version": "4.55.4",
|
||||
"use_cache": true,
|
||||
"vocab_size": 131072
|
||||
}
|
||||
15
generation_config.json
Normal file
15
generation_config.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"_from_model_config": true,
|
||||
"bos_token_id": 1,
|
||||
"do_sample": true,
|
||||
"eos_token_id": [
|
||||
2,
|
||||
21
|
||||
],
|
||||
"pad_token_id": 0,
|
||||
"temperature": 0.6,
|
||||
"top_k": 20,
|
||||
"top_p": 0.95,
|
||||
"frequency_penalty": 0.1,
|
||||
"transformers_version": "4.55.4"
|
||||
}
|
||||
3
model-00001-of-00010.safetensors
Normal file
3
model-00001-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2cb33a03faea56a45215cdf485c79277da4e4635c16ee3d451f70fd5c3338412
|
||||
size 4781571704
|
||||
3
model-00002-of-00010.safetensors
Normal file
3
model-00002-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1003ad729814966f869c71d31b49af116ba08043274c06d52974719eb04f473f
|
||||
size 4781592752
|
||||
3
model-00003-of-00010.safetensors
Normal file
3
model-00003-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e70fdb86ddb36192a6c8a256a2aa2c460c5d87f13d02830b26658e944990dd6d
|
||||
size 4781592768
|
||||
3
model-00004-of-00010.safetensors
Normal file
3
model-00004-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e40e692443c1fb02488ba2538654f4d9f16dd198d3503c1c1377809b1699adb4
|
||||
size 4886471568
|
||||
3
model-00005-of-00010.safetensors
Normal file
3
model-00005-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b8121d7dfc479030aeabac8268a1cf4a48bc24268425c307920251548a823933
|
||||
size 4781592792
|
||||
3
model-00006-of-00010.safetensors
Normal file
3
model-00006-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ba6d484cd95642072b0397b27e90262da0d3716015ac64a8a8ba367f0e11e71f
|
||||
size 4781592784
|
||||
3
model-00007-of-00010.safetensors
Normal file
3
model-00007-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:68c512829b60ff4928dd78ee047c6aadb50de666dc565b805d70a22dd9d9945b
|
||||
size 4886471568
|
||||
3
model-00008-of-00010.safetensors
Normal file
3
model-00008-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:28ce4afe5714f4c8411c6dc0c7dca9a2feaf7549aff8e752eda68cf5cd41f0eb
|
||||
size 4781592792
|
||||
3
model-00009-of-00010.safetensors
Normal file
3
model-00009-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:33f9036c503127e9cc18836c0e00c4e6b1dcb98cb5626dccba7380b66b54f755
|
||||
size 4781592784
|
||||
3
model-00010-of-00010.safetensors
Normal file
3
model-00010-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9328c2f412f85a416163d43f01748880116279d86dc1b87bd5720e4b74d655fc
|
||||
size 3900777040
|
||||
371
model.safetensors.index.json
Normal file
371
model.safetensors.index.json
Normal file
@@ -0,0 +1,371 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_parameters": 23572403200,
|
||||
"total_size": 47144806400
|
||||
},
|
||||
"weight_map": {
|
||||
"model.layers.15.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.28.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.4.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.9.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.29.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.10.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.25.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.28.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.20.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.0.input_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.7.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.33.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.19.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.27.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.19.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.30.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.17.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.33.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.29.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.13.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.30.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.15.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.32.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.28.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.38.mlp.gate_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.36.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.35.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.34.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.35.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.6.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.30.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.24.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.27.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.19.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.35.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.5.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.5.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.25.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.26.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.13.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.31.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.29.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.35.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.35.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.10.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.10.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.34.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.33.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.24.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.17.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.28.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.17.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.29.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.28.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.18.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.37.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.22.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.35.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.30.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.4.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.16.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.9.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.16.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.24.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.8.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.18.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.3.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.31.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.14.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.15.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.18.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.36.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.8.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.38.input_layernorm.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.37.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.31.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.20.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.31.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.30.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.34.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.19.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.35.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.11.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.29.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.29.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.39.input_layernorm.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.11.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.6.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.14.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.8.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.18.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.20.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.25.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.16.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.28.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.15.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.39.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.12.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.21.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.9.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.4.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.11.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.26.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.3.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.36.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.31.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.8.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.19.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.31.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.28.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.22.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.30.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.7.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.16.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.39.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.30.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.21.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.34.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.24.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.15.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.37.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.14.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.6.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.32.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.23.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.5.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.9.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.27.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.32.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.24.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.26.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.17.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.14.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.12.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.13.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.21.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.3.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.37.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.5.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.13.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.39.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.36.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.20.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.33.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.15.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"lm_head.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.7.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.11.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.4.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.18.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.34.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.12.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.35.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.36.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.28.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.20.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.8.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.27.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.39.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.32.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.2.input_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.32.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.38.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.22.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.7.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.8.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.20.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.37.input_layernorm.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.14.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.12.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.34.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.10.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.4.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.33.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.38.self_attn.v_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.26.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.21.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.6.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.32.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.6.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.25.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.25.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.26.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.12.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.7.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.31.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.37.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.29.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.39.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.34.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.39.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.36.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.22.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.15.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.33.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.12.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.22.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.24.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.22.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.37.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.29.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.33.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.17.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.10.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.32.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.14.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.38.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.4.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.17.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.23.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.5.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.23.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.embed_tokens.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.16.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.32.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.38.mlp.up_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.33.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.11.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.23.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.norm.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.25.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.9.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.10.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.32.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.3.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.16.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.34.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.31.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.30.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.33.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.11.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.30.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.28.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.7.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.38.post_attention_layernorm.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.38.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.31.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.19.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.23.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.18.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.36.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.1.input_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.39.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.38.mlp.down_proj.weight": "model-00010-of-00010.safetensors",
|
||||
"model.layers.36.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.9.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.37.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.3.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.34.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.23.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.29.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.27.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.26.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.5.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.6.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.13.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.21.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.37.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.36.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.27.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.21.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.13.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.35.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.39.mlp.gate_proj.weight": "model-00010-of-00010.safetensors"
|
||||
}
|
||||
}
|
||||
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d54d32258647e5a4e66e7a211bd9a8794674666b4ebb049ccf851161a9e489cd
|
||||
size 17078033
|
||||
9014
tokenizer_config.json
Normal file
9014
tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user