From 6aad305542ffecaf21f8eaa30d663749fee87f0d Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Mon, 8 Jun 2026 16:14:15 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: baichuan-inc/Baichuan-M2-32B-GPTQ-Int4 Source: Original Platform --- .gitattributes | 51 + LICENSE | 202 +++ README.md | 138 ++ added_tokens.json | 28 + config.json | 40 + configuration.json | 1 + draft/config.json | 29 + draft/pytorch_model.bin | 3 + draft/qwen2.py | 641 +++++++++ generation_config.json | 13 + merges.txt | 3 + model-00001-of-00004.safetensors | 3 + model-00002-of-00004.safetensors | 3 + model-00003-of-00004.safetensors | 3 + model-00004-of-00004.safetensors | 3 + model.safetensors.index.json | 2122 ++++++++++++++++++++++++++++++ quantize_config.json | 12 + special_tokens_map.json | 31 + tokenizer.json | 3 + tokenizer_config.json | 240 ++++ vocab.json | 3 + 21 files changed, 3572 insertions(+) create mode 100644 .gitattributes create mode 100644 LICENSE create mode 100644 README.md create mode 100644 added_tokens.json create mode 100644 config.json create mode 100644 configuration.json create mode 100644 draft/config.json create mode 100644 draft/pytorch_model.bin create mode 100644 draft/qwen2.py create mode 100644 generation_config.json create mode 100644 merges.txt create mode 100644 model-00001-of-00004.safetensors create mode 100644 model-00002-of-00004.safetensors create mode 100644 model-00003-of-00004.safetensors create mode 100644 model-00004-of-00004.safetensors create mode 100644 model.safetensors.index.json create mode 100644 quantize_config.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 vocab.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9737fd8 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,51 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bin.* filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text + +merges.txt filter=lfs diff=lfs merge=lfs -text +vocab.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..6634c8c --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2024 Alibaba Cloud + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..b940dd7 --- /dev/null +++ b/README.md @@ -0,0 +1,138 @@ +--- +license: apache-2.0 +tags: +- chat +library_name: transformers +language: +- en +- zh +base_model: +- Qwen/Qwen2.5-32B +--- +# Baichuan-M2-32B-GPTQ-Int4 + +[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) +[![Hugging Face](https://img.shields.io/badge/🤗%20Hugging%20Face-Model-yellow)](https://huggingface.co/baichuan-inc/Baichuan-M2-32B) +[![M2 GPTQ-4bit](https://img.shields.io/badge/🤗%20M2%20GPTQ--4bit-Model-orange)](https://huggingface.co/baichuan-inc/Baichuan-M2-32B-GPTQ-Int4) +[![Huawei Ascend 8bit](https://img.shields.io/badge/✨%20Huawei%20Ascend%208bit-Model-green)](https://modelers.cn/models/Baichuan/Baichuan-M2-32B-W8A8) + +## 🌟 Model Overview + +Baichuan-M2-32B is Baichuan AI's medical-enhanced reasoning model, the second medical model released by Baichuan. Designed for real-world medical reasoning tasks, this model builds upon Qwen2.5-32B with an innovative Large Verifier System. Through domain-specific fine-tuning on real-world medical questions, it achieves breakthrough medical performance while maintaining strong general capabilities. + +**Model Features:** + +Baichuan-M2 incorporates three core technical innovations: First, through the **Large Verifier System**, it combines medical scenario characteristics to design a comprehensive medical verification framework, including patient simulators and multi-dimensional verification mechanisms; second, through **medical domain adaptation enhancement** via Mid-Training, it achieves lightweight and efficient medical domain adaptation while preserving general capabilities; finally, it employs a **multi-stage reinforcement learning** strategy, decomposing complex RL tasks into hierarchical training stages to progressively enhance the model's medical knowledge, reasoning, and patient interaction capabilities. + +**Core Highlights:** +- 🏆 **World's Leading Open-Source Medical Model**: Outperforms all open-source models and many proprietary models on HealthBench, achieving medical capabilities closest to GPT-5 +- 🧠 **Doctor-Thinking Alignment**: Trained on real clinical cases and patient simulators, with clinical diagnostic thinking and robust patient interaction capabilities +- ⚡ **Efficient Deployment**: Supports 4-bit quantization for single-RTX4090 deployment, with 58.5% higher token throughput in MTP version for single-user scenarios + +## 📊 Performance Metrics + +### HealthBench Scores + +| Model Name | HealthBench | HealthBench-Hard | HealthBench-Consensus | +|------------|-------------|------------------|-----------------------| +| Baichuan-M2 | 60.1 | 34.7 | 91.5 | +| gpt-oss-120b | 57.6 | 30 | 90 | +| Qwen3-235B-A22B-Thinking-2507 | 55.2 | 25.9 | 90.6 | +| Deepseek-R1-0528 | 53.6 | 22.6 | 91.5 | +| GLM-4.5 | 47.8 | 18.7 | 85.3 | +| Kimi-K2 | 43 | 10.7 | 90.9 | +| gpt-oss-20b | 42.5 | 10.8 | 82.6 | + +### General Performance + +| Benchmark | Baichuan-M2-32B | Qwen3-32B (Thinking) | +|-----------|-----------------|-----------| +| AIME24 | 83.4 | 81.4 | +| AIME25 | 72.9 | 72.9 | +| Arena-Hard-v2.0 | 45.8 | 44.5 | +| CFBench | 77.6 | 75.7 | +| WritingBench | 8.56 | 7.90 | + +*Note: AIME uses max_tokens=64k, others use 32k; temperature=0.6 for all tests.* + +## 🔧 Technical Features + +📗 **Technical Blog**: [Blog - Baichuan-M2](https://www.baichuan-ai.com/blog/baichuan-M2) + +📑 **Technical Report**: [Arxiv - Baichuan-M2](https://arxiv.org/abs/2509.02208) + +### Large Verifier System +- **Patient Simulator**: Virtual patient system based on real clinical cases +- **Multi-Dimensional Verification**: 8 dimensions including medical accuracy, response completeness, and follow-up awareness +- **Dynamic Scoring**: Real-time generation of adaptive evaluation criteria for complex clinical scenarios +### Medical Domain Adaptation +- **Mid-Training**: Medical knowledge injection while preserving general capabilities +- **Reinforcement Learning**: Multi-stage RL strategy optimization +- **General-Specialized Balance**: Carefully balanced medical, general, and mathematical composite training data + +## ⚙️ Quick Start + +For deployment, you can use `sglang>=0.4.6.post1` or `vllm>=0.9.0` or to create an OpenAI-compatible API endpoint: +- SGLang: + ```shell + python -m sglang.launch_server --model-path baichuan-inc/Baichuan-M2-32B-GPTQ-Int4 --reasoning-parser qwen3 + ``` +To turn on kv cache FP8 quantization: + ```shell + python -m sglang.launch_server --model-path baichuan-inc/Baichuan-M2-32B-GPTQ-Int4 --reasoning-parser qwen3 --kv-cache-dtype fp8_e4m3 --attention-backend flashinfer + ``` + +- vLLM: + ```shell + vllm serve baichuan-inc/Baichuan-M2-32B-GPTQ-Int4 --reasoning-parser qwen3 + ``` +To turn on kv cache FP8 quantization: + ```shell + vllm serve baichuan-inc/Baichuan-M2-32B-GPTQ-Int4 --reasoning-parser qwen3 --kv_cache_dtype fp8_e4m3 + ``` + +## MTP inference with SGLang + +1. Replace the qwen2.py file in the sglang installation directory with draft/qwen2.py. +2. Launch sglang: +``` +python3 -m sglang.launch_server \ +--model Baichuan-M2-32B-GPTQ-Int4 \ +--speculative-algorithm EAGLE3 \ +--speculative-draft-model-path Baichuan-M2-32B-GPTQ-Int4/draft \ +--speculative-num-steps 6 \ +--speculative-eagle-topk 10 \ +--speculative-num-draft-tokens 32 \ +--mem-fraction 0.9 \ +--cuda-graph-max-bs 2 \ +--reasoning-parser qwen3 \ +--dtype bfloat16 +``` + +## ⚠️ Usage Notices +1. **Medical Disclaimer**: For research and reference only; cannot replace professional medical diagnosis or treatment +2. **Intended Use Cases**: Medical education, health consultation, clinical decision support +3. **Safe Use**: Recommended under guidance of medical professionals + +## 📄 License +Licensed under the [Apache License 2.0](LICENSE). Research and commercial use permitted. + +## 🤝 Acknowledgements +- Base Model: Qwen2.5-32B +- Training Framework: verl +- Inference Engines: vLLM, SGLang +- Quantization: AutoRound, GPTQ + +Thank you to the open-source community. We commit to continuous contribution and advancement of healthcare AI. + +## 📞 Contact Us +- Resources: [Baichuan AI Website](https://www.baichuan-ai.com) +- Technical Support: [GitHub](https://github.com/baichuan-inc) + +--- +
+ +**Empowering Healthcare with AI, Making Health Accessible to All** + +
+ diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000..b54f913 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,28 @@ +{ + "": 151668, + "": 151658, + "": 151666, + "": 151667, + "": 151657, + "": 151665, + "<|box_end|>": 151649, + "<|box_start|>": 151648, + "<|endoftext|>": 151643, + "<|file_sep|>": 151664, + "<|fim_middle|>": 151660, + "<|fim_pad|>": 151662, + "<|fim_prefix|>": 151659, + "<|fim_suffix|>": 151661, + "<|im_end|>": 151645, + "<|im_start|>": 151644, + "<|image_pad|>": 151655, + "<|object_ref_end|>": 151647, + "<|object_ref_start|>": 151646, + "<|quad_end|>": 151651, + "<|quad_start|>": 151650, + "<|repo_name|>": 151663, + "<|video_pad|>": 151656, + "<|vision_end|>": 151653, + "<|vision_pad|>": 151654, + "<|vision_start|>": 151652 +} diff --git a/config.json b/config.json new file mode 100644 index 0000000..af72176 --- /dev/null +++ b/config.json @@ -0,0 +1,40 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "intermediate_size": 27648, + "max_position_embeddings": 131072, + "max_window_layers": 64, + "model_type": "qwen2", + "num_attention_heads": 40, + "num_hidden_layers": 64, + "num_key_value_heads": 8, + "pad_token_id": 151643, + "quantization_config": { + "autoround_version": "0.5.1", + "bits": 4, + "damp_percent": 0.01, + "data_type": "int", + "desc_act": false, + "group_size": 128, + "nsamples": 512, + "quant_method": "gptq", + "sym": true, + "true_sequential": false + }, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "torch_dtype": "float16", + "transformers_version": "4.52.3", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/draft/config.json b/draft/config.json new file mode 100644 index 0000000..b1a074a --- /dev/null +++ b/draft/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "LlamaForCausalLMEagle3" + ], + "attention_dropout": 0.0, + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "intermediate_size": 27648, + "max_position_embeddings": 32768, + "max_window_layers": 64, + "model_type": "llama", + "num_attention_heads": 40, + "num_hidden_layers": 1, + "num_key_value_heads": 8, + "pad_token_id": 151643, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.3", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064, + "draft_vocab_size": 32000 +} diff --git a/draft/pytorch_model.bin b/draft/pytorch_model.bin new file mode 100644 index 0000000..9a8bddc --- /dev/null +++ b/draft/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f80f49be625de0c703ae279805a764b07377bae6d2a31468e75e12fbf2c17298 +size 1534011812 diff --git a/draft/qwen2.py b/draft/qwen2.py new file mode 100644 index 0000000..4b31f24 --- /dev/null +++ b/draft/qwen2.py @@ -0,0 +1,641 @@ +# Copyright 2023-2024 SGLang Team +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +# Adapted from llama2.py +# Modify details for the adaptation of Qwen2 model. +"""Inference-only Qwen2 model compatible with HuggingFace weights.""" +import logging +from typing import Any, Dict, Iterable, Optional, Tuple, Union, List + +import torch +from torch import nn + +from sglang.srt.distributed import ( + get_pp_group, + get_tensor_model_parallel_rank, + get_tensor_model_parallel_world_size, +) +from sglang.srt.layers.activation import SiluAndMul +from sglang.srt.layers.layernorm import RMSNorm +from sglang.srt.layers.linear import ( + MergedColumnParallelLinear, + QKVParallelLinear, + RowParallelLinear, +) +from sglang.srt.layers.logits_processor import LogitsProcessor +from sglang.srt.layers.pooler import Pooler, PoolingType +from sglang.srt.layers.quantization.base_config import QuantizationConfig +from sglang.srt.layers.radix_attention import RadixAttention +from sglang.srt.layers.rotary_embedding import get_rope +from sglang.srt.layers.utils import PPMissingLayer, get_layer_id +from sglang.srt.layers.vocab_parallel_embedding import ( + ParallelLMHead, + VocabParallelEmbedding, +) +from sglang.srt.managers.schedule_batch import global_server_args_dict +from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTensors +from sglang.srt.model_loader.weight_utils import ( + default_weight_loader, + kv_cache_scales_loader, +) +from sglang.srt.utils import add_prefix, make_layers + +Qwen2Config = None + + +logger = logging.getLogger(__name__) + + +class Qwen2MLP(nn.Module): + def __init__( + self, + hidden_size: int, + intermediate_size: int, + hidden_act: str, + quant_config: Optional[QuantizationConfig] = None, + prefix: str = "", + ) -> None: + super().__init__() + self.gate_up_proj = MergedColumnParallelLinear( + hidden_size, + [intermediate_size] * 2, + bias=False, + quant_config=quant_config, + prefix=add_prefix("gate_up_proj", prefix), + ) + self.down_proj = RowParallelLinear( + intermediate_size, + hidden_size, + bias=False, + quant_config=quant_config, + prefix=add_prefix("down_proj", prefix), + ) + if hidden_act != "silu": + raise ValueError( + f"Unsupported activation: {hidden_act}. " + "Only silu is supported for now." + ) + self.act_fn = SiluAndMul() + + def forward(self, x): + gate_up, _ = self.gate_up_proj(x) + x = self.act_fn(gate_up) + x, _ = self.down_proj(x) + return x + + +class Qwen2Attention(nn.Module): + def __init__( + self, + hidden_size: int, + num_heads: int, + num_kv_heads: int, + head_dim: Optional[int] = None, + layer_id: int = 0, + rope_theta: float = 1000000, + rope_scaling: Optional[Dict[str, Any]] = None, + max_position_embeddings: int = 32768, + quant_config: Optional[QuantizationConfig] = None, + dual_chunk_attention_config: Optional[dict[str, Any]] = None, + prefix: str = "", + ) -> None: + super().__init__() + self.hidden_size = hidden_size + tp_size = get_tensor_model_parallel_world_size() + self.total_num_heads = num_heads + assert self.total_num_heads % tp_size == 0 + self.num_heads = self.total_num_heads // tp_size + self.total_num_kv_heads = num_kv_heads + if self.total_num_kv_heads >= tp_size: + # Number of KV heads is greater than TP size, so we partition + # the KV heads across multiple tensor parallel GPUs. + assert self.total_num_kv_heads % tp_size == 0 + else: + # Number of KV heads is less than TP size, so we replicate + # the KV heads across multiple tensor parallel GPUs. + assert tp_size % self.total_num_kv_heads == 0 + self.num_kv_heads = max(1, self.total_num_kv_heads // tp_size) + if head_dim is not None: + self.head_dim = head_dim + else: + self.head_dim = hidden_size // self.total_num_heads + self.q_size = self.num_heads * self.head_dim + self.kv_size = self.num_kv_heads * self.head_dim + self.scaling = self.head_dim**-0.5 + self.rope_theta = rope_theta + self.max_position_embeddings = max_position_embeddings + + self.qkv_proj = QKVParallelLinear( + hidden_size, + self.head_dim, + self.total_num_heads, + self.total_num_kv_heads, + bias=True, + quant_config=quant_config, + prefix=add_prefix("qkv_proj", prefix), + ) + self.o_proj = RowParallelLinear( + self.total_num_heads * self.head_dim, + hidden_size, + bias=False, + quant_config=quant_config, + prefix=add_prefix("o_proj", prefix), + ) + + self.rotary_emb = get_rope( + self.head_dim, + rotary_dim=self.head_dim, + max_position=max_position_embeddings, + base=rope_theta, + rope_scaling=rope_scaling, + dual_chunk_attention_config=dual_chunk_attention_config, + ) + self.attn = RadixAttention( + self.num_heads, + self.head_dim, + self.scaling, + num_kv_heads=self.num_kv_heads, + layer_id=layer_id, + quant_config=quant_config, + prefix=add_prefix("attn", prefix), + ) + + def forward( + self, + positions: torch.Tensor, + hidden_states: torch.Tensor, + forward_batch: ForwardBatch, + ) -> torch.Tensor: + qkv, _ = self.qkv_proj(hidden_states) + q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1) + q, k = self.rotary_emb(positions, q, k) + attn_output = self.attn(q, k, v, forward_batch) + output, _ = self.o_proj(attn_output) + return output + + +class Qwen2DecoderLayer(nn.Module): + def __init__( + self, + config: Qwen2Config, + layer_id: int = 0, + quant_config: Optional[QuantizationConfig] = None, + prefix: str = "", + alt_stream: Optional[torch.cuda.Stream] = None, + ) -> None: + super().__init__() + self.hidden_size = config.hidden_size + rope_theta = getattr(config, "rope_theta", 1000000) + rope_scaling = getattr(config, "rope_scaling", None) + max_position_embeddings = getattr(config, "max_position_embeddings", 32768) + head_dim = getattr(config, "head_dim", None) + dual_chunk_attention_config = getattr( + config, "dual_chunk_attention_config", None + ) + self.self_attn = Qwen2Attention( + hidden_size=self.hidden_size, + num_heads=config.num_attention_heads, + num_kv_heads=config.num_key_value_heads, + head_dim=head_dim, + layer_id=layer_id, + rope_theta=rope_theta, + rope_scaling=rope_scaling, + max_position_embeddings=max_position_embeddings, + quant_config=quant_config, + dual_chunk_attention_config=dual_chunk_attention_config, + prefix=add_prefix("self_attn", prefix), + ) + self.mlp = Qwen2MLP( + hidden_size=self.hidden_size, + intermediate_size=config.intermediate_size, + hidden_act=config.hidden_act, + quant_config=quant_config, + prefix=add_prefix("mlp", prefix), + ) + self.input_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps) + self.post_attention_layernorm = RMSNorm( + config.hidden_size, eps=config.rms_norm_eps + ) + + def forward( + self, + positions: torch.Tensor, + hidden_states: torch.Tensor, + forward_batch: ForwardBatch, + residual: Optional[torch.Tensor], + ) -> Tuple[torch.Tensor, torch.Tensor]: + # Self Attention + if residual is None: + residual = hidden_states + hidden_states = self.input_layernorm(hidden_states) + else: + hidden_states, residual = self.input_layernorm(hidden_states, residual) + hidden_states = self.self_attn( + positions=positions, + hidden_states=hidden_states, + forward_batch=forward_batch, + ) + + # Fully Connected + hidden_states, residual = self.post_attention_layernorm(hidden_states, residual) + hidden_states = self.mlp(hidden_states) + return hidden_states, residual + + +class Qwen2Model(nn.Module): + def __init__( + self, + config: Qwen2Config, + quant_config: Optional[QuantizationConfig] = None, + prefix: str = "", + decoder_layer_type: type[nn.Module] = Qwen2DecoderLayer, + alt_stream: Optional[torch.cuda.Stream] = None, + ) -> None: + super().__init__() + self.config = config + self.padding_idx = config.pad_token_id + self.vocab_size = config.vocab_size + self.pp_group = get_pp_group() + + if self.pp_group.is_first_rank: + self.embed_tokens = VocabParallelEmbedding( + config.vocab_size, + config.hidden_size, + quant_config=quant_config, + enable_tp=not global_server_args_dict["enable_dp_attention"], + prefix=add_prefix("embed_tokens", prefix), + ) + else: + self.embed_tokens = PPMissingLayer() + + # Use the provided decoder layer type or default to Qwen2DecoderLayer + decoder_layer_type = decoder_layer_type or Qwen2DecoderLayer + self.layers, self.start_layer, self.end_layer = make_layers( + config.num_hidden_layers, + lambda idx, prefix: decoder_layer_type( + layer_id=idx, + config=config, + quant_config=quant_config, + prefix=prefix, + alt_stream=alt_stream, + ), + pp_rank=self.pp_group.rank_in_group, + pp_size=self.pp_group.world_size, + prefix=add_prefix("layers", prefix), + ) + if self.pp_group.is_last_rank: + self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps) + else: + self.norm = PPMissingLayer(return_tuple=True) + + # For EAGLE3 support + self.layers_to_capture = [] + + def get_input_embedding(self, input_ids: torch.Tensor) -> torch.Tensor: + if hasattr(self.config, "scale_emb"): + return self.get_input_embeddings()(input_ids) * self.config.scale_emb + else: + return self.get_input_embeddings()(input_ids) + + def get_input_embeddings(self) -> nn.Embedding: + return self.embed_tokens + + def forward( + self, + input_ids: torch.Tensor, + positions: torch.Tensor, + forward_batch: ForwardBatch, + input_embeds: torch.Tensor = None, + pp_proxy_tensors: Optional[PPProxyTensors] = None, + ) -> Union[torch.Tensor, PPProxyTensors]: + if self.pp_group.is_first_rank: + if input_embeds is None: + hidden_states = self.embed_tokens(input_ids) + else: + hidden_states = input_embeds + residual = None + else: + assert pp_proxy_tensors is not None + hidden_states = pp_proxy_tensors["hidden_states"] + residual = pp_proxy_tensors["residual"] + + aux_hidden_states = [] + for i in range(self.start_layer, self.end_layer): + if i in self.layers_to_capture: + aux_hidden_states.append( + hidden_states + residual if residual is not None else hidden_states + ) + layer = self.layers[i] + hidden_states, residual = layer( + positions, + hidden_states, + forward_batch, + residual, + ) + if not self.pp_group.is_last_rank: + return PPProxyTensors( + { + "hidden_states": hidden_states, + "residual": residual, + } + ) + else: + if hidden_states.shape[0] != 0: + if residual is None: + hidden_states = self.norm(hidden_states) + else: + hidden_states, _ = self.norm(hidden_states, residual) + + if len(aux_hidden_states) == 0: + return hidden_states + + return hidden_states, aux_hidden_states + + # If this function is called, it should always initialize KV cache scale + # factors (or else raise an exception). Thus, handled exceptions should + # make sure to leave KV cache scale factors in a known good (dummy) state + def load_kv_cache_scales(self, quantization_param_path: str) -> None: + tp_size = get_tensor_model_parallel_world_size() + tp_rank = get_tensor_model_parallel_rank() + for layer_idx, scaling_factor in kv_cache_scales_loader( + quantization_param_path, + tp_rank, + tp_size, + self.config.num_hidden_layers, + self.config.__class__.model_type, + ): + if not isinstance(self.layers[layer_idx], nn.Identity): + layer_self_attn = self.layers[layer_idx].self_attn + if hasattr(layer_self_attn.attn, "k_scale"): + layer_self_attn.attn.k_scale = scaling_factor + layer_self_attn.attn.v_scale = scaling_factor + else: + raise RuntimeError( + "Self attention has no KV cache scaling " "factor attribute!" + ) + + +class Qwen2ForCausalLM(nn.Module): + # BitandBytes specific attributes + default_bitsandbytes_target_modules = [ + ".gate_proj.", + ".down_proj.", + ".up_proj.", + ".q_proj.", + ".k_proj.", + ".v_proj.", + ".o_proj.", + ] + bitsandbytes_stacked_params_mapping = { + # shard_name, weight_name, index + "q_proj": ("qkv_proj", 0), + "k_proj": ("qkv_proj", 1), + "v_proj": ("qkv_proj", 2), + "gate_proj": ("gate_up_proj", 0), + "up_proj": ("gate_up_proj", 1), + } + + def __init__( + self, + config: Qwen2Config, + quant_config: Optional[QuantizationConfig] = None, + prefix: str = "", + ) -> None: + super().__init__() + self.pp_group = get_pp_group() + self.config = config + self.quant_config = quant_config + self.model = Qwen2Model( + config, quant_config=quant_config, prefix=add_prefix("model", prefix) + ) + self.capture_aux_hidden_states = False + + # handle the lm head on different pp ranks + if self.pp_group.is_last_rank: + if self.pp_group.world_size == 1 and config.tie_word_embeddings: + self.lm_head = self.model.embed_tokens + else: + self.lm_head = ParallelLMHead( + config.vocab_size, + config.hidden_size, + quant_config=quant_config, + prefix=add_prefix("lm_head", prefix), + ) + else: + # ranks other than the last rank will have a placeholder layer + self.lm_head = PPMissingLayer() + + # perform weight tying for PP + if self.pp_group.world_size > 1 and config.tie_word_embeddings: + if self.pp_group.is_first_rank: + self.pp_group.send( + self.model.embed_tokens.weight, dst=self.pp_group.last_rank + ) + else: + emb_token_weight = self.pp_group.recv( + size=(config.vocab_size, config.hidden_size), + dtype=next(self.model.parameters()).dtype, + src=self.pp_group.first_rank, + ) + self.lm_head.weight.copy_(emb_token_weight) + + self.logits_processor = LogitsProcessor(config) + self.pooler = Pooler(pooling_type=PoolingType.LAST, normalize=True) + + def get_input_embedding(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.model.get_input_embedding(input_ids) + + def get_input_embeddings(self) -> nn.Embedding: + return self.model.embed_tokens + + @torch.no_grad() + def forward( + self, + input_ids: torch.Tensor, + positions: torch.Tensor, + forward_batch: ForwardBatch, + input_embeds: torch.Tensor = None, + get_embedding: bool = False, + pp_proxy_tensors: Optional[PPProxyTensors] = None, + ) -> torch.Tensor: + hidden_states = self.model( + input_ids, + positions, + forward_batch, + input_embeds, + pp_proxy_tensors=pp_proxy_tensors, + ) + aux_hidden_states = None + if self.capture_aux_hidden_states: + hidden_states, aux_hidden_states = hidden_states + + if self.pp_group.is_last_rank: + if not get_embedding: + return self.logits_processor( + input_ids, hidden_states, self.lm_head, forward_batch, aux_hidden_states + ) + else: + return self.pooler(hidden_states, forward_batch) + else: + return hidden_states + + @torch.no_grad() + def forward_split_prefill( + self, + input_ids: torch.Tensor, + positions: torch.Tensor, + forward_batch: ForwardBatch, + split_interval: Tuple[int, int], # [start, end) 0-based + input_embeds: torch.Tensor = None, + ): + start, end = split_interval + # embed + if start == 0: + if input_embeds is None: + forward_batch.hidden_states = self.model.embed_tokens(input_ids) + else: + forward_batch.hidden_states = input_embeds + # decoder layer + for i in range(start, end): + layer = self.model.layers[i] + forward_batch.hidden_states, forward_batch.residual = layer( + positions, + forward_batch.hidden_states, + forward_batch, + forward_batch.residual, + ) + + if end == self.model.config.num_hidden_layers: + # norm + hidden_states, _ = self.model.norm( + forward_batch.hidden_states, forward_batch.residual + ) + forward_batch.hidden_states = hidden_states + # logits process + result = self.logits_processor( + input_ids, forward_batch.hidden_states, self.lm_head, forward_batch + ) + else: + result = None + + return result + + @property + def start_layer(self): + return self.model.start_layer + + @property + def end_layer(self): + return self.model.end_layer + + def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): + stacked_params_mapping = [ + # (param_name, shard_name, shard_id) + ("qkv_proj", "q_proj", "q"), + ("qkv_proj", "k_proj", "k"), + ("qkv_proj", "v_proj", "v"), + ("gate_up_proj", "gate_proj", 0), + ("gate_up_proj", "up_proj", 1), + ] + + params_dict = dict(self.named_parameters()) + for name, loaded_weight in weights: + layer_id = get_layer_id(name) + if ( + layer_id is not None + and hasattr(self.model, "start_layer") + and ( + layer_id < self.model.start_layer + or layer_id >= self.model.end_layer + ) + ): + continue + + if "rotary_emb.inv_freq" in name or "projector" in name: + continue + if "rotary_emb.cos_cached" in name or "rotary_emb.sin_cached" in name: + # Models trained using ColossalAI may include these tensors in + # the checkpoint. Skip them. + continue + if self.config.tie_word_embeddings and "lm_head.weight" in name: + if self.pp_group.world_size > 1 and self.pp_group.is_last_rank: + # Handle pp weight tying here + # find the embed_tokens.weight in the weights + embed_token_weights = next( + filter(lambda x: x[0] == "model.embed_tokens.weight", weights) + )[1] + loaded_weight = embed_token_weights + else: + continue + if name.startswith("model.vision_tower") and name not in params_dict: + continue + + for param_name, weight_name, shard_id in stacked_params_mapping: + if weight_name not in name: + continue + name = name.replace(weight_name, param_name) + # Skip loading extra bias for GPTQ models. + if name.endswith(".bias") and name not in params_dict: + continue + if name not in params_dict: + continue + param = params_dict[name] + weight_loader = param.weight_loader + weight_loader(param, loaded_weight, shard_id) + break + else: + # Skip loading extra bias for GPTQ models. + if name.endswith(".bias") and name not in params_dict: + continue + + if name in params_dict.keys(): + param = params_dict[name] + weight_loader = getattr( + param, "weight_loader", default_weight_loader + ) + weight_loader(param, loaded_weight) + else: + logger.warning(f"Parameter {name} not found in params_dict") + + def get_embed_and_head(self): + return self.model.embed_tokens.weight, self.lm_head.weight + + def set_embed_and_head(self, embed, head): + del self.model.embed_tokens.weight + del self.lm_head.weight + self.model.embed_tokens.weight = embed + self.lm_head.weight = head + torch.cuda.empty_cache() + torch.cuda.synchronize() + + def load_kv_cache_scales(self, quantization_param_path: str) -> None: + self.model.load_kv_cache_scales(quantization_param_path) + + def set_eagle3_layers_to_capture(self, layer_ids: Optional[List[int]] = None): + if not self.pp_group.is_last_rank: + return + + self.capture_aux_hidden_states = True + if layer_ids is None: + num_layers = self.config.num_hidden_layers + self.model.layers_to_capture = [ + 2, + num_layers // 2, + num_layers - 3, + ] # Specific layers for EAGLE3 support + else: + self.model.layers_to_capture = [val + 1 for val in layer_ids] + + +EntryClass = Qwen2ForCausalLM diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..e4f1d31 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,13 @@ +{ + "bos_token_id": 151643, + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "temperature": 0.6, + "top_k": 20, + "top_p": 0.95, + "transformers_version": "4.51.3" +} diff --git a/merges.txt b/merges.txt new file mode 100644 index 0000000..80c1a19 --- /dev/null +++ b/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/model-00001-of-00004.safetensors b/model-00001-of-00004.safetensors new file mode 100644 index 0000000..77ebec8 --- /dev/null +++ b/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1963a73fe18d28e4cc7933c106599202f57a9e78f7b34ff7cdba463512d77d8c +size 4960142856 diff --git a/model-00002-of-00004.safetensors b/model-00002-of-00004.safetensors new file mode 100644 index 0000000..f7318db --- /dev/null +++ b/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:464ef6d2595f2c385002554fcaf076446bdb389f5bba9aabef52b8d4dae23748 +size 4998217992 diff --git a/model-00003-of-00004.safetensors b/model-00003-of-00004.safetensors new file mode 100644 index 0000000..b20d761 --- /dev/null +++ b/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea4a19e13f23d0edade92235785164c85eb08e5d5dc85cb001eba070a50a8372 +size 4965322672 diff --git a/model-00004-of-00004.safetensors b/model-00004-of-00004.safetensors new file mode 100644 index 0000000..5279e7c --- /dev/null +++ b/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99e7e79337525b3df05c3d3fdbf467a467ae97d9730e42c068068397fbbf9b1c +size 4420301592 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..9e234e1 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,2122 @@ +{ + "metadata": { + "total_size": 19343747072 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.down_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.down_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.down_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.down_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.up_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.up_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.up_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.up_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.11.mlp.down_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.11.mlp.down_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.11.mlp.down_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.11.mlp.down_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.11.mlp.up_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.11.mlp.up_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.11.mlp.up_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.11.mlp.up_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.12.mlp.down_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.12.mlp.down_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.12.mlp.down_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.12.mlp.down_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.12.mlp.up_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.12.mlp.up_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.12.mlp.up_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.12.mlp.up_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.13.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.22.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.22.mlp.down_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.22.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.22.mlp.down_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.22.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.22.mlp.up_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.22.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.22.mlp.up_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.23.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.23.mlp.down_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.23.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.23.mlp.down_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.23.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.23.mlp.up_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.23.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.23.mlp.up_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.24.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.24.mlp.down_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.24.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.24.mlp.down_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.24.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.24.mlp.up_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.24.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.24.mlp.up_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.25.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.25.mlp.down_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.25.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.25.mlp.down_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.25.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.25.mlp.up_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.25.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.25.mlp.up_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.26.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.26.mlp.down_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.26.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.26.mlp.down_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.26.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.26.mlp.up_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.26.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.26.mlp.up_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.27.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.27.mlp.down_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.27.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.27.mlp.down_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.27.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.27.mlp.up_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.27.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.27.mlp.up_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.28.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.28.mlp.down_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.28.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.28.mlp.down_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.28.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.28.mlp.up_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.28.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.28.mlp.up_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.29.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.29.mlp.down_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.29.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.29.mlp.down_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.29.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.29.mlp.up_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.29.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.29.mlp.up_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.30.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.30.mlp.down_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.30.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.30.mlp.down_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.30.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.30.mlp.up_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.30.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.30.mlp.up_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.31.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.31.mlp.down_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.31.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.31.mlp.down_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.31.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.31.mlp.up_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.31.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.31.mlp.up_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.32.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.32.mlp.down_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.32.mlp.down_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.32.mlp.down_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.32.mlp.down_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.32.mlp.gate_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.32.mlp.gate_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.32.mlp.gate_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.32.mlp.gate_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.32.mlp.up_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.32.mlp.up_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.32.mlp.up_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.32.mlp.up_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.32.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.32.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.32.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.32.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.32.self_attn.k_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.32.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.32.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.32.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.32.self_attn.o_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.32.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.32.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.32.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.32.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.32.self_attn.q_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.32.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.32.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.32.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.32.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.32.self_attn.v_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.33.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.down_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.down_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.gate_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.up_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.up_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.33.self_attn.k_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.33.self_attn.k_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.33.self_attn.k_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.33.self_attn.k_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.33.self_attn.o_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.33.self_attn.o_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.33.self_attn.o_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.33.self_attn.o_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.33.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.33.self_attn.q_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.33.self_attn.q_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.33.self_attn.q_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.33.self_attn.q_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.33.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.33.self_attn.v_proj.g_idx": "model-00002-of-00004.safetensors", + "model.layers.33.self_attn.v_proj.qweight": "model-00002-of-00004.safetensors", + "model.layers.33.self_attn.v_proj.qzeros": "model-00002-of-00004.safetensors", + "model.layers.33.self_attn.v_proj.scales": "model-00002-of-00004.safetensors", + "model.layers.34.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.down_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.down_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.gate_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.up_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.up_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.k_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.o_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.q_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.v_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.35.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.35.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.35.mlp.down_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.35.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.35.mlp.down_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.35.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.35.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.35.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.35.mlp.gate_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.35.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.35.mlp.up_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.35.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.35.mlp.up_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.k_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.o_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.q_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.v_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.36.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.36.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.36.mlp.down_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.36.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.36.mlp.down_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.36.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.36.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.36.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.36.mlp.gate_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.36.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.36.mlp.up_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.36.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.36.mlp.up_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.36.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.36.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.36.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.36.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.36.self_attn.k_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.36.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.36.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.36.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.36.self_attn.o_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.36.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.36.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.36.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.36.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.36.self_attn.q_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.36.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.36.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.36.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.36.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.36.self_attn.v_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.37.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.37.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.37.mlp.down_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.37.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.37.mlp.down_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.37.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.37.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.37.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.37.mlp.gate_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.37.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.37.mlp.up_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.37.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.37.mlp.up_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.37.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.37.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.37.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.37.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.37.self_attn.k_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.37.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.37.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.37.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.37.self_attn.o_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.37.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.37.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.37.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.37.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.37.self_attn.q_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.37.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.37.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.37.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.37.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.37.self_attn.v_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.38.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.38.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.38.mlp.down_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.38.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.38.mlp.down_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.38.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.38.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.38.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.38.mlp.gate_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.38.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.38.mlp.up_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.38.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.38.mlp.up_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.38.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.38.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.38.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.38.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.38.self_attn.k_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.38.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.38.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.38.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.38.self_attn.o_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.38.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.38.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.38.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.38.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.38.self_attn.q_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.38.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.38.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.38.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.38.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.38.self_attn.v_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.39.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.39.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.39.mlp.down_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.39.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.39.mlp.down_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.39.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.39.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.39.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.39.mlp.gate_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.39.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.39.mlp.up_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.39.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.39.mlp.up_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.39.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.39.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.39.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.39.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.39.self_attn.k_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.39.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.39.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.39.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.39.self_attn.o_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.39.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.39.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.39.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.39.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.39.self_attn.q_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.39.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.39.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.39.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.39.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.39.self_attn.v_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.40.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.40.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.40.mlp.down_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.40.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.40.mlp.down_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.40.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.40.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.40.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.40.mlp.gate_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.40.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.40.mlp.up_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.40.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.40.mlp.up_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.40.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.40.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.40.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.40.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.40.self_attn.k_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.40.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.40.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.40.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.40.self_attn.o_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.40.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.40.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.40.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.40.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.40.self_attn.q_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.40.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.40.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.40.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.40.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.40.self_attn.v_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.41.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.41.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.41.mlp.down_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.41.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.41.mlp.down_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.41.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.41.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.41.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.41.mlp.gate_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.41.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.41.mlp.up_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.41.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.41.mlp.up_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.41.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.41.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.41.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.41.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.41.self_attn.k_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.41.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.41.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.41.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.41.self_attn.o_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.41.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.41.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.41.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.41.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.41.self_attn.q_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.41.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.41.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.41.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.41.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.41.self_attn.v_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.42.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.42.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.42.mlp.down_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.42.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.42.mlp.down_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.42.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.42.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.42.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.42.mlp.gate_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.42.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.42.mlp.up_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.42.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.42.mlp.up_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.42.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.42.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.42.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.42.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.42.self_attn.k_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.42.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.42.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.42.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.42.self_attn.o_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.42.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.42.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.42.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.42.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.42.self_attn.q_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.42.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.42.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.42.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.42.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.42.self_attn.v_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.43.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.43.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.43.mlp.down_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.43.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.43.mlp.down_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.43.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.43.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.43.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.43.mlp.gate_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.43.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.43.mlp.up_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.43.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.43.mlp.up_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.43.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.43.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.43.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.43.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.43.self_attn.k_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.43.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.43.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.43.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.43.self_attn.o_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.43.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.43.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.43.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.43.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.43.self_attn.q_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.43.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.43.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.43.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.43.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.43.self_attn.v_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.44.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.44.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.44.mlp.down_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.44.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.44.mlp.down_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.44.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.44.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.44.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.44.mlp.gate_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.44.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.44.mlp.up_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.44.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.44.mlp.up_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.44.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.44.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.44.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.44.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.44.self_attn.k_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.44.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.44.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.44.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.44.self_attn.o_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.44.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.44.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.44.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.44.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.44.self_attn.q_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.44.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.44.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.44.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.44.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.44.self_attn.v_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.45.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.45.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.45.mlp.down_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.45.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.45.mlp.down_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.45.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.45.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.45.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.45.mlp.gate_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.45.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.45.mlp.up_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.45.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.45.mlp.up_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.45.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.45.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.45.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.45.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.45.self_attn.k_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.45.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.45.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.45.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.45.self_attn.o_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.45.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.45.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.45.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.45.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.45.self_attn.q_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.45.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.45.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.45.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.45.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.45.self_attn.v_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.46.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.46.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.46.mlp.down_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.46.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.46.mlp.down_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.46.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.46.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.46.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.46.mlp.gate_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.46.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.46.mlp.up_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.46.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.46.mlp.up_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.46.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.46.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.46.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.46.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.46.self_attn.k_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.46.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.46.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.46.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.46.self_attn.o_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.46.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.46.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.46.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.46.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.46.self_attn.q_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.46.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.46.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.46.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.46.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.46.self_attn.v_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.47.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.47.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.47.mlp.down_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.47.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.47.mlp.down_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.47.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.47.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.47.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.47.mlp.gate_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.47.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.47.mlp.up_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.47.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.47.mlp.up_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.47.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.47.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.47.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.47.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.47.self_attn.k_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.47.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.47.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.47.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.47.self_attn.o_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.47.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.47.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.47.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.47.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.47.self_attn.q_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.47.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.47.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.47.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.47.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.47.self_attn.v_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.48.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.48.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.48.mlp.down_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.48.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.48.mlp.down_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.48.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.48.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.48.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.48.mlp.gate_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.48.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.48.mlp.up_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.48.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.48.mlp.up_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.48.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.48.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.48.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.48.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.48.self_attn.k_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.48.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.48.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.48.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.48.self_attn.o_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.48.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.48.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.48.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.48.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.48.self_attn.q_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.48.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.48.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.48.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.48.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.48.self_attn.v_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.49.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.49.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.49.mlp.down_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.49.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.49.mlp.down_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.49.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.49.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.49.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.49.mlp.gate_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.49.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.49.mlp.up_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.49.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.49.mlp.up_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.49.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.49.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.49.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.49.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.49.self_attn.k_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.49.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.49.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.49.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.49.self_attn.o_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.49.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.49.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.49.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.49.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.49.self_attn.q_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.49.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.49.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.49.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.49.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.49.self_attn.v_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.50.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.50.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.50.mlp.down_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.50.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.50.mlp.down_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.50.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.50.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.50.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.50.mlp.gate_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.50.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.50.mlp.up_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.50.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.50.mlp.up_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.50.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.50.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.50.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.50.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.50.self_attn.k_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.50.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.50.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.50.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.50.self_attn.o_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.50.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.50.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.50.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.50.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.50.self_attn.q_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.50.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.50.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.50.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.50.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.50.self_attn.v_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.51.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.51.mlp.down_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.51.mlp.down_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.51.mlp.down_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.51.mlp.down_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.51.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.51.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.51.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.51.mlp.gate_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.51.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.51.mlp.up_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.51.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.51.mlp.up_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.51.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.51.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.51.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.51.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.51.self_attn.k_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.51.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.51.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.51.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.51.self_attn.o_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.51.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.51.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.51.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.51.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.51.self_attn.q_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.51.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.51.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.51.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.51.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.51.self_attn.v_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.52.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.52.mlp.down_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.52.mlp.down_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.52.mlp.down_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.52.mlp.down_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.52.mlp.gate_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.52.mlp.gate_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.52.mlp.gate_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.52.mlp.gate_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.52.mlp.up_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.52.mlp.up_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.52.mlp.up_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.52.mlp.up_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.52.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.52.self_attn.k_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.52.self_attn.k_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.52.self_attn.k_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.52.self_attn.k_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.52.self_attn.o_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.52.self_attn.o_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.52.self_attn.o_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.52.self_attn.o_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.52.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.52.self_attn.q_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.52.self_attn.q_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.52.self_attn.q_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.52.self_attn.q_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.52.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.52.self_attn.v_proj.g_idx": "model-00003-of-00004.safetensors", + "model.layers.52.self_attn.v_proj.qweight": "model-00003-of-00004.safetensors", + "model.layers.52.self_attn.v_proj.qzeros": "model-00003-of-00004.safetensors", + "model.layers.52.self_attn.v_proj.scales": "model-00003-of-00004.safetensors", + "model.layers.53.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.53.mlp.down_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.53.mlp.down_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.53.mlp.down_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.53.mlp.down_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.53.mlp.gate_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.53.mlp.gate_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.53.mlp.gate_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.53.mlp.gate_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.53.mlp.up_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.53.mlp.up_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.53.mlp.up_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.53.mlp.up_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.53.self_attn.k_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.53.self_attn.k_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.53.self_attn.k_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.53.self_attn.k_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.53.self_attn.k_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.53.self_attn.o_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.53.self_attn.o_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.53.self_attn.o_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.53.self_attn.o_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.53.self_attn.q_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.53.self_attn.q_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.53.self_attn.q_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.53.self_attn.q_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.53.self_attn.q_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.53.self_attn.v_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.53.self_attn.v_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.53.self_attn.v_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.53.self_attn.v_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.53.self_attn.v_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.54.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.54.mlp.down_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.54.mlp.down_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.54.mlp.down_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.54.mlp.down_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.54.mlp.gate_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.54.mlp.gate_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.54.mlp.gate_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.54.mlp.gate_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.54.mlp.up_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.54.mlp.up_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.54.mlp.up_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.54.mlp.up_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.54.self_attn.k_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.54.self_attn.k_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.54.self_attn.k_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.54.self_attn.k_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.54.self_attn.k_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.54.self_attn.o_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.54.self_attn.o_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.54.self_attn.o_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.54.self_attn.o_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.54.self_attn.q_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.54.self_attn.q_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.54.self_attn.q_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.54.self_attn.q_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.54.self_attn.q_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.54.self_attn.v_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.54.self_attn.v_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.54.self_attn.v_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.54.self_attn.v_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.54.self_attn.v_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.55.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.55.mlp.down_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.55.mlp.down_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.55.mlp.down_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.55.mlp.down_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.55.mlp.gate_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.55.mlp.gate_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.55.mlp.gate_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.55.mlp.gate_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.55.mlp.up_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.55.mlp.up_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.55.mlp.up_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.55.mlp.up_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.55.self_attn.k_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.55.self_attn.k_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.55.self_attn.k_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.55.self_attn.k_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.55.self_attn.k_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.55.self_attn.o_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.55.self_attn.o_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.55.self_attn.o_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.55.self_attn.o_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.55.self_attn.q_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.55.self_attn.q_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.55.self_attn.q_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.55.self_attn.q_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.55.self_attn.q_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.55.self_attn.v_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.55.self_attn.v_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.55.self_attn.v_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.55.self_attn.v_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.55.self_attn.v_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.56.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.56.mlp.down_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.56.mlp.down_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.56.mlp.down_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.56.mlp.down_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.56.mlp.gate_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.56.mlp.gate_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.56.mlp.gate_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.56.mlp.gate_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.56.mlp.up_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.56.mlp.up_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.56.mlp.up_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.56.mlp.up_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.56.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.56.self_attn.k_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.56.self_attn.k_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.56.self_attn.k_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.56.self_attn.k_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.56.self_attn.k_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.56.self_attn.o_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.56.self_attn.o_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.56.self_attn.o_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.56.self_attn.o_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.56.self_attn.q_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.56.self_attn.q_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.56.self_attn.q_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.56.self_attn.q_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.56.self_attn.q_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.56.self_attn.v_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.56.self_attn.v_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.56.self_attn.v_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.56.self_attn.v_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.56.self_attn.v_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.57.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.57.mlp.down_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.57.mlp.down_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.57.mlp.down_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.57.mlp.down_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.57.mlp.gate_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.57.mlp.gate_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.57.mlp.gate_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.57.mlp.gate_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.57.mlp.up_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.57.mlp.up_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.57.mlp.up_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.57.mlp.up_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.57.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.57.self_attn.k_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.57.self_attn.k_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.57.self_attn.k_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.57.self_attn.k_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.57.self_attn.k_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.57.self_attn.o_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.57.self_attn.o_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.57.self_attn.o_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.57.self_attn.o_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.57.self_attn.q_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.57.self_attn.q_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.57.self_attn.q_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.57.self_attn.q_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.57.self_attn.q_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.57.self_attn.v_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.57.self_attn.v_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.57.self_attn.v_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.57.self_attn.v_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.57.self_attn.v_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.58.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.58.mlp.down_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.58.mlp.down_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.58.mlp.down_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.58.mlp.down_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.58.mlp.gate_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.58.mlp.gate_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.58.mlp.gate_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.58.mlp.gate_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.58.mlp.up_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.58.mlp.up_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.58.mlp.up_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.58.mlp.up_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.58.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.58.self_attn.k_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.58.self_attn.k_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.58.self_attn.k_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.58.self_attn.k_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.58.self_attn.k_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.58.self_attn.o_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.58.self_attn.o_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.58.self_attn.o_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.58.self_attn.o_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.58.self_attn.q_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.58.self_attn.q_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.58.self_attn.q_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.58.self_attn.q_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.58.self_attn.q_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.58.self_attn.v_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.58.self_attn.v_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.58.self_attn.v_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.58.self_attn.v_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.58.self_attn.v_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.59.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.59.mlp.down_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.59.mlp.down_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.59.mlp.down_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.59.mlp.down_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.59.mlp.gate_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.59.mlp.gate_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.59.mlp.gate_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.59.mlp.gate_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.59.mlp.up_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.59.mlp.up_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.59.mlp.up_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.59.mlp.up_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.59.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.59.self_attn.k_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.59.self_attn.k_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.59.self_attn.k_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.59.self_attn.k_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.59.self_attn.k_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.59.self_attn.o_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.59.self_attn.o_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.59.self_attn.o_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.59.self_attn.o_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.59.self_attn.q_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.59.self_attn.q_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.59.self_attn.q_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.59.self_attn.q_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.59.self_attn.q_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.59.self_attn.v_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.59.self_attn.v_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.59.self_attn.v_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.59.self_attn.v_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.59.self_attn.v_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.60.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.60.mlp.down_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.60.mlp.down_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.60.mlp.down_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.60.mlp.down_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.60.mlp.gate_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.60.mlp.gate_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.60.mlp.gate_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.60.mlp.gate_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.60.mlp.up_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.60.mlp.up_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.60.mlp.up_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.60.mlp.up_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.60.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.60.self_attn.k_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.60.self_attn.k_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.60.self_attn.k_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.60.self_attn.k_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.60.self_attn.k_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.60.self_attn.o_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.60.self_attn.o_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.60.self_attn.o_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.60.self_attn.o_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.60.self_attn.q_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.60.self_attn.q_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.60.self_attn.q_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.60.self_attn.q_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.60.self_attn.q_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.60.self_attn.v_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.60.self_attn.v_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.60.self_attn.v_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.60.self_attn.v_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.60.self_attn.v_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.61.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.61.mlp.down_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.61.mlp.down_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.61.mlp.down_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.61.mlp.down_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.61.mlp.gate_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.61.mlp.gate_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.61.mlp.gate_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.61.mlp.gate_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.61.mlp.up_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.61.mlp.up_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.61.mlp.up_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.61.mlp.up_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.61.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.61.self_attn.k_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.61.self_attn.k_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.61.self_attn.k_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.61.self_attn.k_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.61.self_attn.k_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.61.self_attn.o_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.61.self_attn.o_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.61.self_attn.o_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.61.self_attn.o_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.61.self_attn.q_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.61.self_attn.q_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.61.self_attn.q_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.61.self_attn.q_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.61.self_attn.q_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.61.self_attn.v_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.61.self_attn.v_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.61.self_attn.v_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.61.self_attn.v_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.61.self_attn.v_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.62.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.62.mlp.down_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.62.mlp.down_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.62.mlp.down_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.62.mlp.down_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.62.mlp.gate_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.62.mlp.gate_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.62.mlp.gate_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.62.mlp.gate_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.62.mlp.up_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.62.mlp.up_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.62.mlp.up_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.62.mlp.up_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.62.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.62.self_attn.k_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.62.self_attn.k_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.62.self_attn.k_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.62.self_attn.k_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.62.self_attn.k_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.62.self_attn.o_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.62.self_attn.o_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.62.self_attn.o_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.62.self_attn.o_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.62.self_attn.q_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.62.self_attn.q_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.62.self_attn.q_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.62.self_attn.q_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.62.self_attn.q_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.62.self_attn.v_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.62.self_attn.v_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.62.self_attn.v_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.62.self_attn.v_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.62.self_attn.v_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.63.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.63.mlp.down_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.63.mlp.down_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.63.mlp.down_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.63.mlp.down_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.63.mlp.gate_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.63.mlp.gate_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.63.mlp.gate_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.63.mlp.gate_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.63.mlp.up_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.63.mlp.up_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.63.mlp.up_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.63.mlp.up_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.63.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.63.self_attn.k_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.63.self_attn.k_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.63.self_attn.k_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.63.self_attn.k_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.63.self_attn.k_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.63.self_attn.o_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.63.self_attn.o_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.63.self_attn.o_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.63.self_attn.o_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.63.self_attn.q_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.63.self_attn.q_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.63.self_attn.q_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.63.self_attn.q_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.63.self_attn.q_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.63.self_attn.v_proj.bias": "model-00004-of-00004.safetensors", + "model.layers.63.self_attn.v_proj.g_idx": "model-00004-of-00004.safetensors", + "model.layers.63.self_attn.v_proj.qweight": "model-00004-of-00004.safetensors", + "model.layers.63.self_attn.v_proj.qzeros": "model-00004-of-00004.safetensors", + "model.layers.63.self_attn.v_proj.scales": "model-00004-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.down_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.down_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.down_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.down_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.scales": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.g_idx": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.qweight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.qzeros": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.scales": "model-00001-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/quantize_config.json b/quantize_config.json new file mode 100644 index 0000000..25bb34d --- /dev/null +++ b/quantize_config.json @@ -0,0 +1,12 @@ +{ + "bits": 4, + "group_size": 128, + "sym": true, + "data_type": "int", + "nsamples": 512, + "autoround_version": "0.5.1", + "quant_method": "gptq", + "desc_act": false, + "true_sequential": false, + "damp_percent": 0.01 +} \ No newline at end of file diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..ac23c0a --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,31 @@ +{ + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..cd71f61 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4 +size 11422654 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..ecbd114 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,240 @@ +{ + "add_bos_token": false, + "add_prefix_space": false, + "added_tokens_decoder": { + "151643": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151645": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151665": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151666": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151667": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151668": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "bos_token": null, + "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and not(message.content.startswith('') and message.content.endswith('')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set content = message.content %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is defined and message.reasoning_content is not none %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '' in message.content %}\n {%- set content = message.content.split('')[-1].lstrip('\\n') %}\n {%- set reasoning_content = message.content.split('')[0].rstrip('\\n').split('')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n\\n' + reasoning_content.strip('\\n') + '\\n\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- message.content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n {%- if thinking_mode is defined %}\n {%- if thinking_mode == \"on\" %}\n {{- '\\n' }}\n {%- elif thinking_mode == \"off\" %}\n {{- '\\n\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endif %}\n", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": {}, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/vocab.json b/vocab.json new file mode 100644 index 0000000..6c49fc6 --- /dev/null +++ b/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833