初始化项目,由ModelHub XC社区提供模型
Model: lmstudio-community/Devstral-Small-2507-MLX-bf16 Source: Original Platform
This commit is contained in:
36
.gitattributes
vendored
Normal file
36
.gitattributes
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
56
README.md
Normal file
56
README.md
Normal file
@@ -0,0 +1,56 @@
|
||||
---
|
||||
language:
|
||||
- en
|
||||
- fr
|
||||
- de
|
||||
- es
|
||||
- pt
|
||||
- it
|
||||
- ja
|
||||
- ko
|
||||
- ru
|
||||
- zh
|
||||
- ar
|
||||
- fa
|
||||
- id
|
||||
- ms
|
||||
- ne
|
||||
- pl
|
||||
- ro
|
||||
- sr
|
||||
- sv
|
||||
- tr
|
||||
- uk
|
||||
- vi
|
||||
- hi
|
||||
- bn
|
||||
license: apache-2.0
|
||||
library_name: vllm
|
||||
inference: false
|
||||
base_model: mistralai/Devstral-Small-2507
|
||||
extra_gated_description: If you want to learn more about how we process your personal
|
||||
data, please read our <a href="https://mistral.ai/terms/">Privacy Policy</a>.
|
||||
pipeline_tag: text2text-generation
|
||||
tags:
|
||||
- mlx
|
||||
---
|
||||
## 💫 Community Model> Devstral-Small-2507 by mistralai
|
||||
|
||||
*👾 [LM Studio](https://lmstudio.ai) Community models highlights program. Highlighting new & noteworthy models by the community. Join the conversation on [Discord](https://discord.gg/aPQfnNkxGC)*.
|
||||
|
||||
**Model creator**: [mistralai](https://huggingface.co/mistralai)<br>
|
||||
**Original model**: [Devstral-Small-2507](https://huggingface.co/mistralai/Devstral-Small-2507)<br>
|
||||
**MLX quantization**: provided by [LM Studio team](https://x.com/lmstudio) using [mlx_lm](https://github.com/ml-explore/mlx-lm)<br>
|
||||
**LM Studio model page**: [mistralai/devstral-small-2507](https://lmstudio.ai/models/mistralai/devstral-small-2507)<br>
|
||||
|
||||
## Technical Details
|
||||
|
||||
Original bfloat16 version of Devstral-Small-2507 using MLX, optimized for Apple Silicon.
|
||||
|
||||
## Special thanks
|
||||
|
||||
🙏 Special thanks to the [Apple Machine Learning Research](https://github.com/ml-explore) team for creating [MLX](https://github.com/ml-explore/mlx).
|
||||
|
||||
## Disclaimers
|
||||
|
||||
LM Studio is not the creator, originator, or owner of any Model featured in the Community Model Program. Each Community Model is created and provided by third parties. LM Studio does not endorse, support, represent or guarantee the completeness, truthfulness, accuracy, or reliability of any Community Model. You understand that Community Models can produce content that might be offensive, harmful, inaccurate or otherwise inappropriate, or deceptive. Each Community Model is the sole responsibility of the person or entity who originated such Model. LM Studio may not monitor or control the Community Models and cannot, and does not, take responsibility for any such Model. LM Studio disclaims all warranties or guarantees about the accuracy, reliability or benefits of the Community Models. LM Studio further disclaims any warranty that the Community Model will meet your requirements, be secure, uninterrupted or available at any time or location, or error-free, viruses-free, or that any errors will be corrected, or otherwise. You will be solely responsible for any damage resulting from your use of or access to the Community Models, your downloading of any Community Model, or use of any other Community Model provided by or through LM Studio.
|
||||
69
chat_template.jinja
Normal file
69
chat_template.jinja
Normal file
@@ -0,0 +1,69 @@
|
||||
{%- set default_system_message = 'You are Devstral, a helpful agentic model trained by Mistral AI. When tools are made available to you, you can use them to solve tasks.\n\n<ROLE>\nAssist users by solving technical problems. Be thorough and prioritize quality over speed.\n* For questions like \"why is X happening\", provide explanations without attempting fixes.\n</ROLE>\n\n<EFFICIENCY>\n* Each action has cost. Combine multiple operations when possible.\n* Use exploration tools efficiently.\n</EFFICIENCY>\n\n<FILE_SYSTEM_GUIDELINES>\n* Don\'t assume file paths are relative to current directory. Explore first to locate files.\n* Edit files directly rather than creating new files with different names.\n* Use `sed` for global search-and-replace instead of opening editors multiple times.\n</FILE_SYSTEM_GUIDELINES>\n\n<CODE_QUALITY>\n* Write clean, efficient code with minimal comments. Avoid redundant explanations.\n* Make minimal changes to solve problems. Understand codebase through exploration first.\n* Split large functions/files when appropriate.\n</CODE_QUALITY>\n\n<VERSION_CONTROL>\n* Exercise caution with git operations. Avoid dangerous changes unless explicitly requested.\n* Use `git status` before committing. Stage necessary files and use `git commit -a` when possible.\n* Don\'t commit files that shouldn\'t be in version control (node_modules/, .env, build dirs, cache, binaries) unless instructed.\n</VERSION_CONTROL>\n\n<PROBLEM_SOLVING_WORKFLOW>\n1. EXPLORATION: Understand context before proposing solutions\n2. ANALYSIS: Consider multiple approaches, select best option\n3. TESTING: Create tests for bugs, consider test-driven development when appropriate. Consult user before extensive test infrastructure setup.\n4. IMPLEMENTATION: Make focused, minimal changes\n5. VERIFICATION: Test thoroughly if environment supports it. Consult user before setting up test environment.\n</PROBLEM_SOLVING_WORKFLOW>\n\n<SECURITY>\n* Only use credentials (GITHUB_TOKEN, etc.) as explicitly requested by user.\n* Use APIs unless user requests otherwise.\n</SECURITY>\n\n<ENVIRONMENT_SETUP>\n* Install missing applications when user requests to run them.\n* For missing dependencies: check for dependency files first (requirements.txt, package.json, etc.), install from those when available.\n</ENVIRONMENT_SETUP>\n\n<TROUBLESHOOTING>\n* If repeated attempts fail: identify 5-7 possible causes, assess likelihood, address systematically.\n* For major issues during plan execution: propose new plan and confirm with user before proceeding.\n</TROUBLESHOOTING>\n\nAbove tool usage guidance applies only to the tools provided in the available tools section. If there is no explicit available tools section, you do not have access to any tools.' %}
|
||||
{{- bos_token }}
|
||||
{%- if messages[0]['role'] == 'system' %}
|
||||
{%- if messages[0]['content'] is string %}
|
||||
{%- set system_message = messages[0]['content'] %}
|
||||
{%- else %}
|
||||
{%- set system_message = messages[0]['content'][0]['text'] %}
|
||||
{%- endif %}
|
||||
{%- set loop_messages = messages[1:] %}
|
||||
{%- else %}
|
||||
{%- set system_message = default_system_message %}
|
||||
{%- set loop_messages = messages %}
|
||||
{%- endif %}
|
||||
{%- if tools %}
|
||||
{%- set system_message = system_message + '\n\n<TOOL_CALL_FORMAT>\nWhen calling tools, use the following format: [TOOL_CALLS]function_name[ARGS]{"arg1": "value1", "arg2": "value2"}[TOOL_CALLS]function_name2[ARGS]{"arg1": "value1"}...\n</TOOL_CALL_FORMAT>' %}
|
||||
{%- endif %}
|
||||
{{- '[SYSTEM_PROMPT]' + system_message + '[/SYSTEM_PROMPT]' }}
|
||||
{%- if tools %}
|
||||
{{- '[AVAILABLE_TOOLS]' }}
|
||||
{%- for tool in tools %}
|
||||
{{- tool | tojson }}
|
||||
{%- endfor %}
|
||||
{{- '[/AVAILABLE_TOOLS]' }}
|
||||
{%- endif %}
|
||||
{%- for message in loop_messages %}
|
||||
{%- if message['role'] == 'user' %}
|
||||
{%- if message['content'] is string %}
|
||||
{{- '[INST]' + message['content'] + '[/INST]' }}
|
||||
{%- else %}
|
||||
{{- '[INST]' }}
|
||||
{%- for block in message['content'] %}
|
||||
{%- if block['type'] == 'text' %}
|
||||
{{- block['text'] }}
|
||||
{%- else %}
|
||||
{{- raise_exception('Only text is supported in message content!') }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{{- '[/INST]' }}
|
||||
{%- endif %}
|
||||
{%- elif message['role'] == 'system' %}
|
||||
{%- if message['content'] is string %}
|
||||
{{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}
|
||||
{%- else %}
|
||||
{{- '[SYSTEM_PROMPT]' + message['content'][0]['text'] + '[/SYSTEM_PROMPT]' }}
|
||||
{%- endif %}
|
||||
{%- elif message['role'] == 'assistant' %}
|
||||
{%- if message.get('tool_calls') %}
|
||||
{%- if message['content'] is string and message['content'] %}
|
||||
{{- message['content'] }}
|
||||
{%- elif message['content'] and message['content'][0]['text'] %}
|
||||
{{- message['content'][0]['text'] }}
|
||||
{%- endif %}
|
||||
{%- for tool_call in message['tool_calls'] %}
|
||||
{{- '[TOOL_CALLS]' + tool_call['function']['name'] + '[ARGS]' + (tool_call['function']['arguments'] | tojson) }}
|
||||
{%- endfor %}
|
||||
{{- eos_token }}
|
||||
{%- else %}
|
||||
{%- if message['content'] is string %}
|
||||
{{- message['content'] + eos_token }}
|
||||
{%- else %}
|
||||
{{- message['content'][0]['text'] + eos_token }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- elif message['role'] == 'tool' %}
|
||||
{{- '[TOOL_RESULTS]' + message['content'] + '[/TOOL_RESULTS]' }}
|
||||
{%- else %}
|
||||
{{- raise_exception('Only user, system, assistant and tool roles are supported!') }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
27
config.json
Normal file
27
config.json
Normal file
@@ -0,0 +1,27 @@
|
||||
{
|
||||
"architectures": [
|
||||
"MistralForCausalLM"
|
||||
],
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 1,
|
||||
"eos_token_id": 2,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 5120,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 32768,
|
||||
"max_position_embeddings": 131072,
|
||||
"model_type": "mistral",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 40,
|
||||
"num_key_value_heads": 8,
|
||||
"pad_token_id": 11,
|
||||
"rms_norm_eps": 1e-05,
|
||||
"rope_theta": 1000000000.0,
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": false,
|
||||
"torch_dtype": "bfloat16",
|
||||
"transformers_version": "4.53.1",
|
||||
"use_cache": true,
|
||||
"vocab_size": 131072
|
||||
}
|
||||
7
generation_config.json
Normal file
7
generation_config.json
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"_from_model_config": true,
|
||||
"bos_token_id": 1,
|
||||
"eos_token_id": 2,
|
||||
"pad_token_id": 11,
|
||||
"transformers_version": "4.53.1"
|
||||
}
|
||||
3
model-00001-of-00010.safetensors
Normal file
3
model-00001-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4c97e7429c8a770a3c2ea458b02516e75e99ec78f9105b3988ec33a9506239c8
|
||||
size 5117116159
|
||||
3
model-00002-of-00010.safetensors
Normal file
3
model-00002-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c13dca2d66a13e935350186bdc45ccd022f4c5f02b26fb60b2049d61b91e8286
|
||||
size 5222015985
|
||||
3
model-00003-of-00010.safetensors
Normal file
3
model-00003-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9a49f78c2dcefbf0c3a89829aafbb22ddad80c0ed14207d6f1922a32fa39f79b
|
||||
size 5117137230
|
||||
3
model-00004-of-00010.safetensors
Normal file
3
model-00004-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:35ce51217f5029c64ed7e928e29b028e25fab752cbd094453430695d9a929004
|
||||
size 5222016045
|
||||
3
model-00005-of-00010.safetensors
Normal file
3
model-00005-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5eb1562adf0c7297d2dc240b1ebd6f99ec4191a5a0dcd5cb1de82a892d170fc8
|
||||
size 5222016025
|
||||
3
model-00006-of-00010.safetensors
Normal file
3
model-00006-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4731687257badc6ca0c4f9d9350cf902ea5376729eca27611a5a320edc8dbb8e
|
||||
size 5117137260
|
||||
3
model-00007-of-00010.safetensors
Normal file
3
model-00007-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:20e60948c370375d2874f2bce7db04fa0b1465e89416d6b0f4b526b05e7eda4a
|
||||
size 5222016027
|
||||
3
model-00008-of-00010.safetensors
Normal file
3
model-00008-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a8e7eb490d4c3922f502f2e1995b3ff7d2f7af268842b21b61398ce4d0220e0a
|
||||
size 5222016039
|
||||
3
model-00009-of-00010.safetensors
Normal file
3
model-00009-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2df07c008b047c51911cc581335f760505616aa2dcd3b52075a29f7ddce773ee
|
||||
size 4341200611
|
||||
3
model-00010-of-00010.safetensors
Normal file
3
model-00010-of-00010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5cf0483af4189194bab770598253da2df0dde722a799da7e71bc94c0d51fa478
|
||||
size 1342177407
|
||||
371
model.safetensors.index.json
Normal file
371
model.safetensors.index.json
Normal file
@@ -0,0 +1,371 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_size": 47144806400,
|
||||
"total_parameters": 23572403200
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "model-00010-of-00010.safetensors",
|
||||
"model.embed_tokens.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.input_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.input_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.10.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.10.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.10.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.10.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.10.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.10.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.11.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.11.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.11.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.11.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.11.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.11.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.12.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.12.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.12.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.12.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.12.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.12.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.13.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.13.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.13.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.13.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.13.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.13.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.14.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.14.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.14.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.14.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.14.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.14.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.15.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.15.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.15.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.15.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.15.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.15.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.16.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.16.mlp.down_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.16.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.16.mlp.up_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.16.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.16.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.17.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.17.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.17.mlp.gate_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.17.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.17.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.17.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.weight": "model-00004-of-00010.safetensors",
|
||||
"model.layers.18.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.18.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.18.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.18.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.18.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.18.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.19.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.19.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.19.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.19.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.19.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.19.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.2.input_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.20.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.20.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.20.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.20.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.20.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.20.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.21.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.21.mlp.down_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.21.mlp.gate_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.21.mlp.up_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.21.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.21.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.22.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.22.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.22.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.22.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.22.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.22.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.weight": "model-00005-of-00010.safetensors",
|
||||
"model.layers.23.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.23.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.23.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.23.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.23.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.23.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.24.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.24.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.24.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.24.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.24.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.24.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.25.input_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.25.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.25.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.25.mlp.up_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.25.post_attention_layernorm.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.25.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.26.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.26.mlp.down_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.26.mlp.gate_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.26.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.26.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.26.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
|
||||
"model.layers.27.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.27.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.27.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.27.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.27.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.27.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.28.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.28.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.28.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.28.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.28.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.28.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.28.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.28.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.28.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.29.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.29.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.29.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.29.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.29.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.29.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.29.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.29.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.29.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.3.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.3.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.3.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.3.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
|
||||
"model.layers.30.input_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.30.mlp.down_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.30.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.30.mlp.up_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.30.post_attention_layernorm.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.30.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.30.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.30.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.30.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.31.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.31.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.31.mlp.gate_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.31.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.31.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.31.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.31.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.31.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.31.self_attn.v_proj.weight": "model-00007-of-00010.safetensors",
|
||||
"model.layers.32.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.32.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.32.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.32.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.32.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.32.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.32.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.32.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.32.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.33.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.33.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.33.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.33.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.33.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.33.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.33.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.33.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.33.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.34.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.34.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.34.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.34.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.34.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.34.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.34.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.34.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.34.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.35.input_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.35.mlp.down_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.35.mlp.gate_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.35.mlp.up_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.35.post_attention_layernorm.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.35.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.35.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.35.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.35.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.36.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.36.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.36.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.36.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.36.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.36.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.36.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.36.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.36.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
||||
"model.layers.37.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.37.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.37.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.37.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.37.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.37.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.37.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.37.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.37.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.38.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.38.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.38.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.38.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.38.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.38.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.38.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.38.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.38.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.39.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.39.mlp.down_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.39.mlp.gate_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.39.mlp.up_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.39.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.39.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.39.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.39.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.39.self_attn.v_proj.weight": "model-00009-of-00010.safetensors",
|
||||
"model.layers.4.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.4.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.4.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.4.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.4.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.4.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.5.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.5.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.5.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.5.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.5.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.5.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.6.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.6.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.6.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.6.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.6.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.6.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.7.input_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.7.mlp.down_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.7.mlp.gate_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.7.mlp.up_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.7.post_attention_layernorm.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.7.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.8.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.8.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.8.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.8.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.8.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.8.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.weight": "model-00002-of-00010.safetensors",
|
||||
"model.layers.9.input_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.9.mlp.down_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.9.mlp.gate_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.9.mlp.up_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.9.post_attention_layernorm.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.9.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
|
||||
"model.norm.weight": "model-00009-of-00010.safetensors"
|
||||
}
|
||||
}
|
||||
1032
special_tokens_map.json
Normal file
1032
special_tokens_map.json
Normal file
File diff suppressed because it is too large
Load Diff
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:23ad081f384b2bdb3c97f6e5461dfce52c1174c7328854a55006988f0fef9da7
|
||||
size 17078019
|
||||
9020
tokenizer_config.json
Normal file
9020
tokenizer_config.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user