初始化项目,由ModelHub XC社区提供模型
Model: katanemo/Arch-Agent-32B Source: Original Platform
This commit is contained in:
37
.gitattributes
vendored
Normal file
37
.gitattributes
vendored
Normal file
@@ -0,0 +1,37 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
assets/Arch-Agent-32B.png filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
76
LICENSE
Normal file
76
LICENSE
Normal file
@@ -0,0 +1,76 @@
|
||||
# KATANEMO COMMUNITY LICENSE AGREEMENT
|
||||
**Version Release Date:** April 2, 2026
|
||||
|
||||
This KATANEMO COMMUNITY LICENSE AGREEMENT is based on the Llama 3.2 Community License Agreement (https://www.llama.com/llama3_2/license/), which has been adapted for terms specific to the distribution and use of proprietary Katanemo Materials (defined below) provided by DigitalOcean, LLC.
|
||||
|
||||
---
|
||||
|
||||
1.Definitions
|
||||
a. "Agreement": The terms and conditions for use, reproduction, distribution, and modification of the Katanemo Materials set forth herein.
|
||||
b. "Documentation": The specifications, manuals, and documentation accompanying Katanemo Models.
|
||||
c. "Licensee" or "you”: The individual or entity entering into this Agreement, including your employer if you are acting on their behalf.
|
||||
d. "Katanemo Model(s)": The foundational large language models and model software and algorithms, including machine-learning model code, trained model weights, inference-enabling code, training-enabling code, fine-tuning enabling code and other elements of the foregoing distributed by DigitalOcean at https://huggingface.co/katanemolabs.
|
||||
e. "Katanemo Materials": Collectively, Katanemo's proprietary models (including Katanemo Models) and Documentation.
|
||||
f. “DigitalOcean” or "we": DigitalOcean, LLC, a Delaware, USA Corporation.
|
||||
|
||||
---
|
||||
|
||||
2.
|
||||
By clicking "I Accept" below or by using or distributing any portion or element of the Katanemo Materials, you agree to be bound by this Agreement.
|
||||
|
||||
---
|
||||
|
||||
3. LICENSE RIGHTS AND REDISTRIBUTION
|
||||
a. Grant of Rights
|
||||
Subject to Section 4, you are granted a non-exclusive, worldwide, non-transferable, and royalty-free license under DigitalOcean’s intellectual property or other rights owned by DigitalOcean embodied in the Katanemo Materials to:
|
||||
- Use, reproduce, distribute, copy, creative derivative works of, and make modifications to the Katanemo Materials, and
|
||||
- Create derivative works based on the Katanemo Materials.
|
||||
|
||||
b. Redistribution and Use
|
||||
i. If you distribute the Katanemo Materials or or make available the Katanemo Materials (or any derivative works thereof), or a product or service (including another AI model) that contains any of them, you shall:
|
||||
(A) provide a copy of this Agreement with any such Katanemo Materials; and
|
||||
(B) prominently display “Built with DigitalOcean” on a related website, user interface, blogpost, about page, or product documentation. If you use the Katanemo Materials or any outputs or results of the Katanemo Materials to create, train, fine tune, or otherwise improve any AI model, which is distributed or made available, you shall also include “DigitalOcean” at the beginning of any such AI model name.
|
||||
|
||||
ii. If you receive Katanemo Materials, or any derivative works thereof, from a Licensee as part of an integrated end user product, then Section 4 of this Agreement will not apply to you.
|
||||
|
||||
iii. You must retain in all copies of the Katanemo Materials that you distribute the following attribution notice within a “Notice” text file distributed as a part of such copies: “Katanemo Models are licensed under the DigitalOcean Community License, Copyright 2026 DigitalOcean, LLC. All Rights Reserved.”
|
||||
|
||||
iv. Your use of the Katanemo Materials must comply with applicable laws and regulations (including trade compliance laws and regulations) and adhere to the Acceptable Use Policy for the Katanemo Materials (available at https://www.digitalocean.com/legal/acceptable-use-policy), which is hereby incorporated by reference into this Agreement.
|
||||
|
||||
---
|
||||
|
||||
4. ADDITIONAL COMMERCIAL TERMS
|
||||
|
||||
If you use, reproduce, modify, distribute, and create derivative works from the Katanemo Materials for commercial purposes, you must obtain a separate commercial license from DigitalOcean, which DigitalOcean may grant to you in its sole discretion, and you are not authorized to exercise any of the rights under this Agreement unless or until DigitalOcean otherwise expressly grants you such rights.
|
||||
|
||||
---
|
||||
|
||||
5. Disclaimer of Warranty
|
||||
UNLESS REQUIRED BY APPLICABLE LAW, THE KATANEMO MATERIALS AND ANY OUTPUT AND RESULTS THEREFROM ARE PROVIDED ON AN “AS IS” BASIS, WITHOUT WARRANTIES OF ANY KIND, AND DIGITALOCEAN DISCLAIMS ALL WARRANTIES OF ANY KIND, BOTH EXPRESS AND IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. YOU ARE SOLELY RESPONSIBLE FOR DETERMINING THE APPROPRIATENESS OF USING OR REDISTRIBUTING THE KATANEMO MATERIALS AND ASSUME ANY RISKS ASSOCIATED WITH YOUR USE OF THE KATANEMO MATERIALS AND ANY OUTPUT AND RESULTS.
|
||||
|
||||
---
|
||||
|
||||
6. Limitation of Liability
|
||||
IN NO EVENT WILL DIGITALOCEAN OR ITS AFFILIATES BE LIABLE UNDER ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, TORT, NEGLIGENCE, PRODUCTS LIABILITY, OR OTHERWISE, ARISING OUT OF THIS AGREEMENT, FOR ANY LOST PROFITS OR ANY INDIRECT, SPECIAL, CONSEQUENTIAL, INCIDENTAL, EXEMPLARY OR PUNITIVE DAMAGES, EVEN IF DIGITALOCEAN OR ITS AFFILIATES HAVE BEEN ADVISED OF THE POSSIBILITY OF ANY OF THE FOREGOING.
|
||||
|
||||
---
|
||||
|
||||
7. Intellectual Property
|
||||
a. Trademarks
|
||||
No trademark licenses are granted under this Agreement, and in connection with the Katanemo Materials, neither DigitalOcean nor you may use any name or mark owned by or associated with the other or any of its affiliates, except as required for reasonable and customary use in describing and redistributing the Katanemo Materials or as set forth in this Section 7(a). DigitalOcean hereby grants you a limited license to use “Katanemo” (the “Mark”) solely as required to comply with the last sentence of Section 3.b.i. You will comply with DigitalOcean’s brand guidelines as may be provided by DigitalOcean. All goodwill arising out of your use of the Mark will inure to the benefit of DigitalOcean.
|
||||
|
||||
b. Ownership
|
||||
Subject to DigitalOcean’s ownership of Katanemo Materials and derivatives made by or for DigitalOcean, with respect to any derivative works and modifications of the Katanemo Materials that are made by you, as between you and DigitalOcean, you are and will be the owner of such derivative works and modifications.
|
||||
|
||||
c. Litigation
|
||||
If you institute litigation or other proceedings against DigitalOcean or any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Katanemo Materials or Katanemo Model outputs or results, or any portion of any of the foregoing, constitutes infringement of intellectual property or other rights owned or licensable by you, then any licenses granted to you under this Agreement shall terminate as of the date such litigation or claim is filed or instituted. You will indemnify and hold harmless DigitalOcean from and against any claim by any third party arising out of or related to your use or distribution of the Katanemo Materials.
|
||||
|
||||
---
|
||||
|
||||
8. Term and Termination
|
||||
The term of this Agreement will commence upon your acceptance of this Agreement or access to the Katanemo Materials and will continue in full force and effect until terminated in accordance with the terms and conditions herein. DigitalOcean may terminate this Agreement if you are in breach of any term or condition of this Agreement. Upon termination of this Agreement, you shall delete and cease use of the Katanemo Materials. Sections 5, 6, 7.b and 9 shall survive the termination of this Agreement.
|
||||
|
||||
---
|
||||
|
||||
9. Governing Law and Jurisdiction
|
||||
This Agreement is governed by the laws of the State of Colorado, USA without regard to choice of law principles, and the UN Convention on Contracts for the International Sale of Goods does not apply to this Agreement. The courts of Denver County, Colorado, shall have exclusive jurisdiction of any dispute arising out of this Agreement.
|
||||
124
README.md
Normal file
124
README.md
Normal file
@@ -0,0 +1,124 @@
|
||||
---
|
||||
license: other
|
||||
license_name: katanemo-research
|
||||
license_link: >-
|
||||
https://huggingface.co/katanemo/Arch-Agent-32B/blob/main/LICENSE
|
||||
base_model:
|
||||
- Qwen/Qwen2.5-Coder-32B-Instruct
|
||||
language:
|
||||
- en
|
||||
pipeline_tag: text-generation
|
||||
library_name: transformers
|
||||
---
|
||||
|
||||
# katanemo/Arch-Agent-32B
|
||||
|
||||
## Overview
|
||||
Arch-Agent is a collection of state-of-the-art (SOTA) LLMs specifically designed for advanced function calling and agent-based applications. Designed to power sophisticated multi-step and multi-turn workflows, Arch-Agent excels at handling complex, multi-step tasks that require intelligent tool selection, adaptive planning, and seamless integration with external APIs and services. Built with a focus on real-world agent deployments, Arch-Agent delivers leading performance in complex scenarios while maintaining reliability and precision across extended function call sequences. Key capabilities inlcude:
|
||||
|
||||
- **Multi-Turn Function Calling**: Maintains contextual continuity across multiple dialogue turns, enabling natural, ongoing conversations with nested or evolving tool use.
|
||||
- **Multi-Step Function Calling**: Plans and executes a sequence of function calls to complete complex tasks. Adapts dynamically based on intermediate results and decomposes goals into sub-tasks.
|
||||
- **Agentic Capabilities**: Advanced decision-making and workflow management for complex agentic tasks with seamless tool coordination and error recovery.
|
||||
|
||||
For more details, including fine-tuning, inference, and deployment, please refer to our [Github](https://github.com/katanemo/Arch-Function).
|
||||
|
||||
|
||||
## Performance Benchmarks
|
||||
We evaluate Katanemo Arch-Agent series on the [Berkeley Function-Calling Leaderboard (BFCL)](https://gorilla.cs.berkeley.edu/leaderboard.html#leaderboard). We compare with commonly-used models and the results (as of June 14th, 2025) are shown below.
|
||||
|
||||
<div align="center">
|
||||
<img width="100%" height="auto" src="./assets/Arch-Agent-32B.png"></a>
|
||||
</div>
|
||||
|
||||
> [!NOTE]
|
||||
> For evaluation, we use YaRN scaling to deploy the models for Multi-Turn evaluation, and all Arch-Agent models are evaluated with a context length of 64K.
|
||||
|
||||
## Requirements
|
||||
The code of Arch-Agent-32B has been in the Hugging Face `transformers` library and we recommend to install latest version:
|
||||
```bash
|
||||
pip install transformers>=4.51.0
|
||||
```
|
||||
|
||||
|
||||
## How to use
|
||||
We use the following example to illustrate how to use our model to perform function calling tasks. Please note that, our model works best with our provided prompt format. It allows us to extract JSON output that is similar to the [OpenAI's function calling](https://platform.openai.com/docs/guides/function-calling).
|
||||
|
||||
|
||||
### Quickstart
|
||||
````python
|
||||
import json
|
||||
from typing import Any, Dict, List
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
model_name = "katanemo/Arch-Agent-32B"
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_name, device_map="auto", torch_dtype="auto", trust_remote_code=True
|
||||
)
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
|
||||
TASK_PROMPT = (
|
||||
"You are a helpful assistant designed to assist with the user query by making one or more function calls if needed."
|
||||
"\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\n"
|
||||
"You are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{tool_text}"
|
||||
"\n</tools>\n\nFor each function call, return a json object with function name and arguments within "
|
||||
"""<tool_call></tool_call> XML tags:\n<tool_call>\n{{"name": <function-name>, """
|
||||
""""arguments": <args-json-object>}}\n</tool_call>"""
|
||||
)
|
||||
|
||||
# Define available tools
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather for a location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "str",
|
||||
"description": "The city and state, e.g. San Francisco, New York",
|
||||
},
|
||||
"unit": {
|
||||
"type": "str",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The unit of temperature to return",
|
||||
},
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
# Helper function to create the system prompt for our model
|
||||
def format_prompt(tools: List[Dict[str, Any]]):
|
||||
tool_text = "\n".join(
|
||||
[json.dumps(tool["function"], ensure_ascii=False) for tool in tools]
|
||||
)
|
||||
return TASK_PROMPT.format(tool_text=tool_text)
|
||||
|
||||
system_prompt = format_prompt(tools)
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": "What is the weather in Seattle?"},
|
||||
]
|
||||
|
||||
model_inputs = tokenizer.apply_chat_template(
|
||||
messages, add_generation_prompt=True, return_tensors="pt", return_dict=True
|
||||
).to(model.device)
|
||||
|
||||
generated_ids = model.generate(**model_inputs, max_new_tokens=32768)
|
||||
generated_ids = [
|
||||
output_ids[len(input_ids) :]
|
||||
for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
|
||||
]
|
||||
|
||||
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
||||
print(response)
|
||||
````
|
||||
|
||||
# License
|
||||
The Arch-Agent collection is distributed under the [Katanemo license](https://huggingface.co/katanemo/Arch-Agent-32B/blob/main/LICENSE).
|
||||
3
assets/Arch-Agent-32B.png
Normal file
3
assets/Arch-Agent-32B.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5a8538fc16e3dacae410fd2b2e72e650baea68e867fef83c7821cca29c9467eb
|
||||
size 115025
|
||||
54
chat_template.jinja
Normal file
54
chat_template.jinja
Normal file
@@ -0,0 +1,54 @@
|
||||
{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0]['role'] == 'system' %}
|
||||
{{- messages[0]['content'] }}
|
||||
{%- else %}
|
||||
{{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
|
||||
{%- endif %}
|
||||
{{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0]['role'] == 'system' %}
|
||||
{{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- for message in messages %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{{- '<|im_start|>' + message.role }}
|
||||
{%- if message.content %}
|
||||
{{- '\n' + message.content }}
|
||||
{%- endif %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if tool_call.function is defined %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_call>\n{"name": "' }}
|
||||
{{- tool_call.name }}
|
||||
{{- '", "arguments": ' }}
|
||||
{{- tool_call.arguments | tojson }}
|
||||
{{- '}\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>user' }}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_response>\n' }}
|
||||
{{- message.content }}
|
||||
{{- '\n</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- endif %}
|
||||
28
config.json
Normal file
28
config.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"architectures": [
|
||||
"Qwen2ForCausalLM"
|
||||
],
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 151643,
|
||||
"eos_token_id": 151645,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 5120,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 27648,
|
||||
"max_position_embeddings": 32768,
|
||||
"max_window_layers": 64,
|
||||
"model_type": "qwen2",
|
||||
"num_attention_heads": 40,
|
||||
"num_hidden_layers": 64,
|
||||
"num_key_value_heads": 8,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 1000000.0,
|
||||
"sliding_window": 131072,
|
||||
"tie_word_embeddings": false,
|
||||
"torch_dtype": "bfloat16",
|
||||
"transformers_version": "4.52.3",
|
||||
"use_cache": false,
|
||||
"use_sliding_window": false,
|
||||
"vocab_size": 152064
|
||||
}
|
||||
14
generation_config.json
Normal file
14
generation_config.json
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"bos_token_id": 151643,
|
||||
"do_sample": true,
|
||||
"eos_token_id": [
|
||||
151645,
|
||||
151643
|
||||
],
|
||||
"pad_token_id": 151643,
|
||||
"repetition_penalty": 1.05,
|
||||
"temperature": 0.7,
|
||||
"top_k": 20,
|
||||
"top_p": 0.8,
|
||||
"transformers_version": "4.52.3"
|
||||
}
|
||||
151388
merges.txt
Normal file
151388
merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
3
model-00001-of-00014.safetensors
Normal file
3
model-00001-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3ecf8129243de6ffd6edd4bc76b29f6525d4ece41b26612c4ebbefecf4660706
|
||||
size 4891730992
|
||||
3
model-00002-of-00014.safetensors
Normal file
3
model-00002-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:056c66ea71793808a7d526702702960cbe59be76ffba64248503be76bab06288
|
||||
size 4876059352
|
||||
3
model-00003-of-00014.safetensors
Normal file
3
model-00003-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:06bb84d6fc56c92273185ce9db84c4798be89d5139b9a0a756ac6cddf09847d1
|
||||
size 4876059384
|
||||
3
model-00004-of-00014.safetensors
Normal file
3
model-00004-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:49f0dafaddd9ede06da1083a9bb6ac79135de56c51023fed5e15b7f3520d7a65
|
||||
size 4876059416
|
||||
3
model-00005-of-00014.safetensors
Normal file
3
model-00005-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:da520aff9a10d109b54d1a4567a2e4e32f333a817202dfefab88e720044ad68c
|
||||
size 4876059416
|
||||
3
model-00006-of-00014.safetensors
Normal file
3
model-00006-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:471f045fe1ae3e6fbba36c8f4eb401546d4851077a0c359bb161c55588926762
|
||||
size 4876059416
|
||||
3
model-00007-of-00014.safetensors
Normal file
3
model-00007-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:11428c192ebf480150a6a39f766177db2076aa7bdab9734b3761aa46897ffbb7
|
||||
size 4876059416
|
||||
3
model-00008-of-00014.safetensors
Normal file
3
model-00008-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ff56a9f0281af61569cc0a83468af1dd1275272976a6e24b1725e5964ae13113
|
||||
size 4876059416
|
||||
3
model-00009-of-00014.safetensors
Normal file
3
model-00009-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8717e668e3a336b41a7f99acf9877528776eaabb43ac4d5975a2af12394944d9
|
||||
size 4876059416
|
||||
3
model-00010-of-00014.safetensors
Normal file
3
model-00010-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:aa13c47081c361ae764935c1cfc99400b73c654a4e7c90c03b413405722045d0
|
||||
size 4876059416
|
||||
3
model-00011-of-00014.safetensors
Normal file
3
model-00011-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a20b20f27fc9126696f0d4d2326d94cd9573ed2acfe078103d37d3a49934bf17
|
||||
size 4876059416
|
||||
3
model-00012-of-00014.safetensors
Normal file
3
model-00012-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:fdefc9c7a18934d3431bcf6bf10dfa07df3c19523a12b756b56384893ff319db
|
||||
size 4876059416
|
||||
3
model-00013-of-00014.safetensors
Normal file
3
model-00013-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8b8a76d2d76b4a35c377f276c17a667e8aa8159141ae93b0ff7b341e12cd394c
|
||||
size 4876059416
|
||||
3
model-00014-of-00014.safetensors
Normal file
3
model-00014-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b3fbb01bf2e9d0b38ec9ecc0a3b5c8a034ce35f9c92f021460edbbcfcae92a14
|
||||
size 2123397800
|
||||
778
model.safetensors.index.json
Normal file
778
model.safetensors.index.json
Normal file
@@ -0,0 +1,778 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_size": 65527752704
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "model-00014-of-00014.safetensors",
|
||||
"model.embed_tokens.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.0.input_layernorm.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.bias": "model-00001-of-00014.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.bias": "model-00001-of-00014.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.bias": "model-00001-of-00014.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.1.input_layernorm.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.bias": "model-00001-of-00014.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.bias": "model-00001-of-00014.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.bias": "model-00001-of-00014.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.10.input_layernorm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.10.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.10.mlp.gate_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.10.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.10.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.bias": "model-00003-of-00014.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.10.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.bias": "model-00003-of-00014.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.bias": "model-00003-of-00014.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.11.input_layernorm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.11.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.11.mlp.gate_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.11.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.11.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.bias": "model-00003-of-00014.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.11.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.bias": "model-00003-of-00014.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.bias": "model-00003-of-00014.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.12.input_layernorm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.12.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.12.mlp.gate_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.12.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.12.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.bias": "model-00003-of-00014.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.12.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.bias": "model-00003-of-00014.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.bias": "model-00003-of-00014.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.13.input_layernorm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.13.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.13.mlp.gate_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.13.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.13.post_attention_layernorm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.bias": "model-00003-of-00014.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.13.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.bias": "model-00003-of-00014.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.bias": "model-00003-of-00014.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.14.input_layernorm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.14.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.14.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.14.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.14.post_attention_layernorm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.bias": "model-00004-of-00014.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.14.self_attn.o_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.bias": "model-00004-of-00014.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.bias": "model-00004-of-00014.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.15.input_layernorm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.15.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.15.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.15.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.15.post_attention_layernorm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.bias": "model-00004-of-00014.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.15.self_attn.o_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.bias": "model-00004-of-00014.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.bias": "model-00004-of-00014.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.16.input_layernorm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.16.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.16.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.16.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.16.post_attention_layernorm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.bias": "model-00004-of-00014.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.16.self_attn.o_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.bias": "model-00004-of-00014.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.bias": "model-00004-of-00014.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.17.input_layernorm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.17.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.17.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.17.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.17.post_attention_layernorm.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.bias": "model-00004-of-00014.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.17.self_attn.o_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.bias": "model-00004-of-00014.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.bias": "model-00004-of-00014.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.18.input_layernorm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.18.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.18.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.18.mlp.up_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.18.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.bias": "model-00004-of-00014.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.18.self_attn.o_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.bias": "model-00004-of-00014.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.bias": "model-00004-of-00014.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
||||
"model.layers.19.input_layernorm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.19.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.19.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.19.mlp.up_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.19.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.bias": "model-00005-of-00014.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.19.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.bias": "model-00005-of-00014.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.bias": "model-00005-of-00014.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.2.input_layernorm.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.bias": "model-00001-of-00014.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.bias": "model-00001-of-00014.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.bias": "model-00001-of-00014.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.20.input_layernorm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.20.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.20.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.20.mlp.up_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.20.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.bias": "model-00005-of-00014.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.20.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.bias": "model-00005-of-00014.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.bias": "model-00005-of-00014.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.21.input_layernorm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.21.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.21.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.21.mlp.up_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.21.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.bias": "model-00005-of-00014.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.21.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.bias": "model-00005-of-00014.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.bias": "model-00005-of-00014.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.22.input_layernorm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.22.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.22.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.22.mlp.up_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.22.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.bias": "model-00005-of-00014.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.22.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.bias": "model-00005-of-00014.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.bias": "model-00005-of-00014.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.23.input_layernorm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.23.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.23.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.23.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.23.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.bias": "model-00005-of-00014.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.23.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.bias": "model-00005-of-00014.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.bias": "model-00005-of-00014.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
||||
"model.layers.24.input_layernorm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.24.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.24.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.24.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.24.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.bias": "model-00006-of-00014.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.24.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.bias": "model-00006-of-00014.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.bias": "model-00006-of-00014.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.25.input_layernorm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.25.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.25.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.25.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.25.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.bias": "model-00006-of-00014.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.25.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.bias": "model-00006-of-00014.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.bias": "model-00006-of-00014.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.26.input_layernorm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.26.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.26.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.26.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.26.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.bias": "model-00006-of-00014.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.26.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.bias": "model-00006-of-00014.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.bias": "model-00006-of-00014.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.27.input_layernorm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.27.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.27.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.27.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.27.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.bias": "model-00006-of-00014.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.27.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.bias": "model-00006-of-00014.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.bias": "model-00006-of-00014.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.28.input_layernorm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.28.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.28.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.28.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.28.post_attention_layernorm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.28.self_attn.k_proj.bias": "model-00006-of-00014.safetensors",
|
||||
"model.layers.28.self_attn.k_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.28.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.28.self_attn.q_proj.bias": "model-00006-of-00014.safetensors",
|
||||
"model.layers.28.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.28.self_attn.v_proj.bias": "model-00006-of-00014.safetensors",
|
||||
"model.layers.28.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
|
||||
"model.layers.29.input_layernorm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.29.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.29.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.29.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.29.post_attention_layernorm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.29.self_attn.k_proj.bias": "model-00007-of-00014.safetensors",
|
||||
"model.layers.29.self_attn.k_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.29.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.29.self_attn.q_proj.bias": "model-00007-of-00014.safetensors",
|
||||
"model.layers.29.self_attn.q_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.29.self_attn.v_proj.bias": "model-00007-of-00014.safetensors",
|
||||
"model.layers.29.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.3.input_layernorm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.3.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.3.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.3.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.bias": "model-00001-of-00014.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.bias": "model-00001-of-00014.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.bias": "model-00001-of-00014.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00014.safetensors",
|
||||
"model.layers.30.input_layernorm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.30.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.30.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.30.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.30.post_attention_layernorm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.30.self_attn.k_proj.bias": "model-00007-of-00014.safetensors",
|
||||
"model.layers.30.self_attn.k_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.30.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.30.self_attn.q_proj.bias": "model-00007-of-00014.safetensors",
|
||||
"model.layers.30.self_attn.q_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.30.self_attn.v_proj.bias": "model-00007-of-00014.safetensors",
|
||||
"model.layers.30.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.31.input_layernorm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.31.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.31.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.31.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.31.post_attention_layernorm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.31.self_attn.k_proj.bias": "model-00007-of-00014.safetensors",
|
||||
"model.layers.31.self_attn.k_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.31.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.31.self_attn.q_proj.bias": "model-00007-of-00014.safetensors",
|
||||
"model.layers.31.self_attn.q_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.31.self_attn.v_proj.bias": "model-00007-of-00014.safetensors",
|
||||
"model.layers.31.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.32.input_layernorm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.32.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.32.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.32.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.32.post_attention_layernorm.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.32.self_attn.k_proj.bias": "model-00007-of-00014.safetensors",
|
||||
"model.layers.32.self_attn.k_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.32.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.32.self_attn.q_proj.bias": "model-00007-of-00014.safetensors",
|
||||
"model.layers.32.self_attn.q_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.32.self_attn.v_proj.bias": "model-00007-of-00014.safetensors",
|
||||
"model.layers.32.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.33.input_layernorm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.33.mlp.down_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.33.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.33.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.33.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.33.self_attn.k_proj.bias": "model-00007-of-00014.safetensors",
|
||||
"model.layers.33.self_attn.k_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.33.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.33.self_attn.q_proj.bias": "model-00007-of-00014.safetensors",
|
||||
"model.layers.33.self_attn.q_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.33.self_attn.v_proj.bias": "model-00007-of-00014.safetensors",
|
||||
"model.layers.33.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
|
||||
"model.layers.34.input_layernorm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.34.mlp.down_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.34.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.34.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.34.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.34.self_attn.k_proj.bias": "model-00008-of-00014.safetensors",
|
||||
"model.layers.34.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.34.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.34.self_attn.q_proj.bias": "model-00008-of-00014.safetensors",
|
||||
"model.layers.34.self_attn.q_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.34.self_attn.v_proj.bias": "model-00008-of-00014.safetensors",
|
||||
"model.layers.34.self_attn.v_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.35.input_layernorm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.35.mlp.down_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.35.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.35.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.35.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.35.self_attn.k_proj.bias": "model-00008-of-00014.safetensors",
|
||||
"model.layers.35.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.35.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.35.self_attn.q_proj.bias": "model-00008-of-00014.safetensors",
|
||||
"model.layers.35.self_attn.q_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.35.self_attn.v_proj.bias": "model-00008-of-00014.safetensors",
|
||||
"model.layers.35.self_attn.v_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.36.input_layernorm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.36.mlp.down_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.36.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.36.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.36.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.36.self_attn.k_proj.bias": "model-00008-of-00014.safetensors",
|
||||
"model.layers.36.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.36.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.36.self_attn.q_proj.bias": "model-00008-of-00014.safetensors",
|
||||
"model.layers.36.self_attn.q_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.36.self_attn.v_proj.bias": "model-00008-of-00014.safetensors",
|
||||
"model.layers.36.self_attn.v_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.37.input_layernorm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.37.mlp.down_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.37.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.37.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.37.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.37.self_attn.k_proj.bias": "model-00008-of-00014.safetensors",
|
||||
"model.layers.37.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.37.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.37.self_attn.q_proj.bias": "model-00008-of-00014.safetensors",
|
||||
"model.layers.37.self_attn.q_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.37.self_attn.v_proj.bias": "model-00008-of-00014.safetensors",
|
||||
"model.layers.37.self_attn.v_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.38.input_layernorm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.38.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.38.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.38.mlp.up_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.38.post_attention_layernorm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.38.self_attn.k_proj.bias": "model-00008-of-00014.safetensors",
|
||||
"model.layers.38.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.38.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.38.self_attn.q_proj.bias": "model-00008-of-00014.safetensors",
|
||||
"model.layers.38.self_attn.q_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.38.self_attn.v_proj.bias": "model-00008-of-00014.safetensors",
|
||||
"model.layers.38.self_attn.v_proj.weight": "model-00008-of-00014.safetensors",
|
||||
"model.layers.39.input_layernorm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.39.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.39.mlp.gate_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.39.mlp.up_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.39.post_attention_layernorm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.39.self_attn.k_proj.bias": "model-00009-of-00014.safetensors",
|
||||
"model.layers.39.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.39.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.39.self_attn.q_proj.bias": "model-00009-of-00014.safetensors",
|
||||
"model.layers.39.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.39.self_attn.v_proj.bias": "model-00009-of-00014.safetensors",
|
||||
"model.layers.39.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.4.input_layernorm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.4.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.4.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.4.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.4.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.bias": "model-00002-of-00014.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.4.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.bias": "model-00002-of-00014.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.bias": "model-00002-of-00014.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.40.input_layernorm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.40.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.40.mlp.gate_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.40.mlp.up_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.40.post_attention_layernorm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.40.self_attn.k_proj.bias": "model-00009-of-00014.safetensors",
|
||||
"model.layers.40.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.40.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.40.self_attn.q_proj.bias": "model-00009-of-00014.safetensors",
|
||||
"model.layers.40.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.40.self_attn.v_proj.bias": "model-00009-of-00014.safetensors",
|
||||
"model.layers.40.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.41.input_layernorm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.41.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.41.mlp.gate_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.41.mlp.up_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.41.post_attention_layernorm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.41.self_attn.k_proj.bias": "model-00009-of-00014.safetensors",
|
||||
"model.layers.41.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.41.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.41.self_attn.q_proj.bias": "model-00009-of-00014.safetensors",
|
||||
"model.layers.41.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.41.self_attn.v_proj.bias": "model-00009-of-00014.safetensors",
|
||||
"model.layers.41.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.42.input_layernorm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.42.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.42.mlp.gate_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.42.mlp.up_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.42.post_attention_layernorm.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.42.self_attn.k_proj.bias": "model-00009-of-00014.safetensors",
|
||||
"model.layers.42.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.42.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.42.self_attn.q_proj.bias": "model-00009-of-00014.safetensors",
|
||||
"model.layers.42.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.42.self_attn.v_proj.bias": "model-00009-of-00014.safetensors",
|
||||
"model.layers.42.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.43.input_layernorm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.43.mlp.down_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.43.mlp.gate_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.43.mlp.up_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.43.post_attention_layernorm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.43.self_attn.k_proj.bias": "model-00009-of-00014.safetensors",
|
||||
"model.layers.43.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.43.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.43.self_attn.q_proj.bias": "model-00009-of-00014.safetensors",
|
||||
"model.layers.43.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.43.self_attn.v_proj.bias": "model-00009-of-00014.safetensors",
|
||||
"model.layers.43.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
|
||||
"model.layers.44.input_layernorm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.44.mlp.down_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.44.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.44.mlp.up_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.44.post_attention_layernorm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.44.self_attn.k_proj.bias": "model-00010-of-00014.safetensors",
|
||||
"model.layers.44.self_attn.k_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.44.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.44.self_attn.q_proj.bias": "model-00010-of-00014.safetensors",
|
||||
"model.layers.44.self_attn.q_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.44.self_attn.v_proj.bias": "model-00010-of-00014.safetensors",
|
||||
"model.layers.44.self_attn.v_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.45.input_layernorm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.45.mlp.down_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.45.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.45.mlp.up_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.45.post_attention_layernorm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.45.self_attn.k_proj.bias": "model-00010-of-00014.safetensors",
|
||||
"model.layers.45.self_attn.k_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.45.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.45.self_attn.q_proj.bias": "model-00010-of-00014.safetensors",
|
||||
"model.layers.45.self_attn.q_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.45.self_attn.v_proj.bias": "model-00010-of-00014.safetensors",
|
||||
"model.layers.45.self_attn.v_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.46.input_layernorm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.46.mlp.down_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.46.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.46.mlp.up_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.46.post_attention_layernorm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.46.self_attn.k_proj.bias": "model-00010-of-00014.safetensors",
|
||||
"model.layers.46.self_attn.k_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.46.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.46.self_attn.q_proj.bias": "model-00010-of-00014.safetensors",
|
||||
"model.layers.46.self_attn.q_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.46.self_attn.v_proj.bias": "model-00010-of-00014.safetensors",
|
||||
"model.layers.46.self_attn.v_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.47.input_layernorm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.47.mlp.down_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.47.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.47.mlp.up_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.47.post_attention_layernorm.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.47.self_attn.k_proj.bias": "model-00010-of-00014.safetensors",
|
||||
"model.layers.47.self_attn.k_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.47.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.47.self_attn.q_proj.bias": "model-00010-of-00014.safetensors",
|
||||
"model.layers.47.self_attn.q_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.47.self_attn.v_proj.bias": "model-00010-of-00014.safetensors",
|
||||
"model.layers.47.self_attn.v_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.48.input_layernorm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.48.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.48.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.48.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.48.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.48.self_attn.k_proj.bias": "model-00010-of-00014.safetensors",
|
||||
"model.layers.48.self_attn.k_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.48.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.48.self_attn.q_proj.bias": "model-00010-of-00014.safetensors",
|
||||
"model.layers.48.self_attn.q_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.48.self_attn.v_proj.bias": "model-00010-of-00014.safetensors",
|
||||
"model.layers.48.self_attn.v_proj.weight": "model-00010-of-00014.safetensors",
|
||||
"model.layers.49.input_layernorm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.49.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.49.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.49.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.49.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.49.self_attn.k_proj.bias": "model-00011-of-00014.safetensors",
|
||||
"model.layers.49.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.49.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.49.self_attn.q_proj.bias": "model-00011-of-00014.safetensors",
|
||||
"model.layers.49.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.49.self_attn.v_proj.bias": "model-00011-of-00014.safetensors",
|
||||
"model.layers.49.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.5.input_layernorm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.5.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.5.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.5.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.5.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.bias": "model-00002-of-00014.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.5.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.bias": "model-00002-of-00014.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.bias": "model-00002-of-00014.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.50.input_layernorm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.50.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.50.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.50.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.50.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.50.self_attn.k_proj.bias": "model-00011-of-00014.safetensors",
|
||||
"model.layers.50.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.50.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.50.self_attn.q_proj.bias": "model-00011-of-00014.safetensors",
|
||||
"model.layers.50.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.50.self_attn.v_proj.bias": "model-00011-of-00014.safetensors",
|
||||
"model.layers.50.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.51.input_layernorm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.51.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.51.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.51.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.51.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.51.self_attn.k_proj.bias": "model-00011-of-00014.safetensors",
|
||||
"model.layers.51.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.51.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.51.self_attn.q_proj.bias": "model-00011-of-00014.safetensors",
|
||||
"model.layers.51.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.51.self_attn.v_proj.bias": "model-00011-of-00014.safetensors",
|
||||
"model.layers.51.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.52.input_layernorm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.52.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.52.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.52.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.52.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.52.self_attn.k_proj.bias": "model-00011-of-00014.safetensors",
|
||||
"model.layers.52.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.52.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.52.self_attn.q_proj.bias": "model-00011-of-00014.safetensors",
|
||||
"model.layers.52.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.52.self_attn.v_proj.bias": "model-00011-of-00014.safetensors",
|
||||
"model.layers.52.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.53.input_layernorm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.53.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.53.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.53.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.53.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.53.self_attn.k_proj.bias": "model-00011-of-00014.safetensors",
|
||||
"model.layers.53.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.53.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.53.self_attn.q_proj.bias": "model-00011-of-00014.safetensors",
|
||||
"model.layers.53.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.53.self_attn.v_proj.bias": "model-00011-of-00014.safetensors",
|
||||
"model.layers.53.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
|
||||
"model.layers.54.input_layernorm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.54.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.54.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.54.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.54.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.54.self_attn.k_proj.bias": "model-00012-of-00014.safetensors",
|
||||
"model.layers.54.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.54.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.54.self_attn.q_proj.bias": "model-00012-of-00014.safetensors",
|
||||
"model.layers.54.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.54.self_attn.v_proj.bias": "model-00012-of-00014.safetensors",
|
||||
"model.layers.54.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.55.input_layernorm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.55.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.55.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.55.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.55.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.55.self_attn.k_proj.bias": "model-00012-of-00014.safetensors",
|
||||
"model.layers.55.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.55.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.55.self_attn.q_proj.bias": "model-00012-of-00014.safetensors",
|
||||
"model.layers.55.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.55.self_attn.v_proj.bias": "model-00012-of-00014.safetensors",
|
||||
"model.layers.55.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.56.input_layernorm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.56.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.56.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.56.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.56.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.56.self_attn.k_proj.bias": "model-00012-of-00014.safetensors",
|
||||
"model.layers.56.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.56.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.56.self_attn.q_proj.bias": "model-00012-of-00014.safetensors",
|
||||
"model.layers.56.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.56.self_attn.v_proj.bias": "model-00012-of-00014.safetensors",
|
||||
"model.layers.56.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.57.input_layernorm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.57.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.57.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.57.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.57.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.57.self_attn.k_proj.bias": "model-00012-of-00014.safetensors",
|
||||
"model.layers.57.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.57.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.57.self_attn.q_proj.bias": "model-00012-of-00014.safetensors",
|
||||
"model.layers.57.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.57.self_attn.v_proj.bias": "model-00012-of-00014.safetensors",
|
||||
"model.layers.57.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.58.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.58.mlp.down_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.58.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.58.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.58.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.58.self_attn.k_proj.bias": "model-00012-of-00014.safetensors",
|
||||
"model.layers.58.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.58.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.58.self_attn.q_proj.bias": "model-00012-of-00014.safetensors",
|
||||
"model.layers.58.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.58.self_attn.v_proj.bias": "model-00012-of-00014.safetensors",
|
||||
"model.layers.58.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
|
||||
"model.layers.59.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.59.mlp.down_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.59.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.59.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.59.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.59.self_attn.k_proj.bias": "model-00013-of-00014.safetensors",
|
||||
"model.layers.59.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.59.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.59.self_attn.q_proj.bias": "model-00013-of-00014.safetensors",
|
||||
"model.layers.59.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.59.self_attn.v_proj.bias": "model-00013-of-00014.safetensors",
|
||||
"model.layers.59.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.6.input_layernorm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.6.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.6.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.6.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.6.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.bias": "model-00002-of-00014.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.6.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.bias": "model-00002-of-00014.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.bias": "model-00002-of-00014.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.60.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.60.mlp.down_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.60.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.60.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.60.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.60.self_attn.k_proj.bias": "model-00013-of-00014.safetensors",
|
||||
"model.layers.60.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.60.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.60.self_attn.q_proj.bias": "model-00013-of-00014.safetensors",
|
||||
"model.layers.60.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.60.self_attn.v_proj.bias": "model-00013-of-00014.safetensors",
|
||||
"model.layers.60.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.61.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.61.mlp.down_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.61.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.61.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.61.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.61.self_attn.k_proj.bias": "model-00013-of-00014.safetensors",
|
||||
"model.layers.61.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.61.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.61.self_attn.q_proj.bias": "model-00013-of-00014.safetensors",
|
||||
"model.layers.61.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.61.self_attn.v_proj.bias": "model-00013-of-00014.safetensors",
|
||||
"model.layers.61.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.62.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.62.mlp.down_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.62.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.62.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.62.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.62.self_attn.k_proj.bias": "model-00013-of-00014.safetensors",
|
||||
"model.layers.62.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.62.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.62.self_attn.q_proj.bias": "model-00013-of-00014.safetensors",
|
||||
"model.layers.62.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.62.self_attn.v_proj.bias": "model-00013-of-00014.safetensors",
|
||||
"model.layers.62.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.63.input_layernorm.weight": "model-00014-of-00014.safetensors",
|
||||
"model.layers.63.mlp.down_proj.weight": "model-00014-of-00014.safetensors",
|
||||
"model.layers.63.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.63.mlp.up_proj.weight": "model-00014-of-00014.safetensors",
|
||||
"model.layers.63.post_attention_layernorm.weight": "model-00014-of-00014.safetensors",
|
||||
"model.layers.63.self_attn.k_proj.bias": "model-00013-of-00014.safetensors",
|
||||
"model.layers.63.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.63.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.63.self_attn.q_proj.bias": "model-00013-of-00014.safetensors",
|
||||
"model.layers.63.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.63.self_attn.v_proj.bias": "model-00013-of-00014.safetensors",
|
||||
"model.layers.63.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
|
||||
"model.layers.7.input_layernorm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.7.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.7.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.7.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.7.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.bias": "model-00002-of-00014.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.7.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.bias": "model-00002-of-00014.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.bias": "model-00002-of-00014.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.8.input_layernorm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.8.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.8.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.8.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.8.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.bias": "model-00002-of-00014.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.8.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.bias": "model-00002-of-00014.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.bias": "model-00002-of-00014.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.weight": "model-00002-of-00014.safetensors",
|
||||
"model.layers.9.input_layernorm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.9.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.9.mlp.gate_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.9.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.9.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.bias": "model-00003-of-00014.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.9.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.bias": "model-00003-of-00014.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.bias": "model-00003-of-00014.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
|
||||
"model.norm.weight": "model-00014-of-00014.safetensors"
|
||||
}
|
||||
}
|
||||
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
|
||||
size 11421896
|
||||
208
tokenizer_config.json
Normal file
208
tokenizer_config.json
Normal file
@@ -0,0 +1,208 @@
|
||||
{
|
||||
"add_bos_token": false,
|
||||
"add_prefix_space": false,
|
||||
"added_tokens_decoder": {
|
||||
"151643": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151644": {
|
||||
"content": "<|im_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151645": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151646": {
|
||||
"content": "<|object_ref_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151647": {
|
||||
"content": "<|object_ref_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151648": {
|
||||
"content": "<|box_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151649": {
|
||||
"content": "<|box_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151650": {
|
||||
"content": "<|quad_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151651": {
|
||||
"content": "<|quad_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151652": {
|
||||
"content": "<|vision_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151653": {
|
||||
"content": "<|vision_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151654": {
|
||||
"content": "<|vision_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151655": {
|
||||
"content": "<|image_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151656": {
|
||||
"content": "<|video_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151657": {
|
||||
"content": "<tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151658": {
|
||||
"content": "</tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151659": {
|
||||
"content": "<|fim_prefix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151660": {
|
||||
"content": "<|fim_middle|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151661": {
|
||||
"content": "<|fim_suffix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151662": {
|
||||
"content": "<|fim_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151663": {
|
||||
"content": "<|repo_name|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151664": {
|
||||
"content": "<|file_sep|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
}
|
||||
},
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"bos_token": null,
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|im_end|>",
|
||||
"errors": "replace",
|
||||
"extra_special_tokens": {},
|
||||
"model_max_length": 32768,
|
||||
"pad_token": "<|endoftext|>",
|
||||
"padding_side": "right",
|
||||
"split_special_tokens": false,
|
||||
"tokenizer_class": "Qwen2Tokenizer",
|
||||
"unk_token": null
|
||||
}
|
||||
1
vocab.json
Normal file
1
vocab.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user