commit 1ed1822c1bd101842d6aae1b55a9854a9ed58026 Author: ModelHub XC Date: Sat Apr 11 06:39:00 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: stepfun-ai/StepFun-Prover-Preview-7B Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..21b3632 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,49 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bin.* filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text + +tokenizer.json filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..56ee3c8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..d4725c8 --- /dev/null +++ b/README.md @@ -0,0 +1,58 @@ +--- +language: +- en +base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B +tags: +- chat +library_name: transformers +license: apache-2.0 +--- + +# StepFun-Prover-Preview-7B + +**StepFun-Prover-Preview-7B** is a theorem proving model developed by StepFun Team. It can iteratively refine the proof sketch via interacting with Lean4, and achieve 66.0% accuracy with Pass@1 on MiniF2F-test. Advanced usage examples can be seen in [github](https://github.com/stepfun-ai/StepFun-Prover-Preview). + +# Quick Start with vLLM + +```python +from vllm import LLM, SamplingParams +from transformers import AutoTokenizer + +model_name = "Stepfun/Stepfun-Prover-Preview-7B" +model = LLM( + model=model_name, + tensor_parallel_size=4, + ) +tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) + +formal_problem = """ +import Mathlib + +theorem test_theorem (x y z : ℝ) (hx : 0 < x) (hy : 0 < y) (hz : 0 < z) : + (x^2 - z^2) / (y + z) + (y^2 - x^2) / (z + x) + (z^2 - y^2) / (x + y) ≥ 0 := by +""".strip() + +system_prompt = "You will be given an unsolved Lean 4 problem. Think carefully and work towards a solution. At any point, you may use the Lean 4 REPL to check your progress by enclosing your partial solution between and . The REPL feedback will be provided between and . Continue this process as needed until you arrive at a complete and correct solution." + +user_prompt = f"```lean4\n{formal_problem}\n```" + +dialog = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt} +] + +prompt = tokenizer.apply_chat_template(dialog, tokenize=False, add_generation_prompt=True) + +sampling_params = SamplingParams( + temperature=0.999, + top_p=0.95, + top_k=-1, + max_tokens=16384, + stop_token_ids=[151643, 151666], # <|end▁of▁sentence|>, + include_stop_str_in_output=True, +) + +output = model.generate(prompt, sampling_params=sampling_params) +output_text = output[0].outputs[0].text +print(output_text) +``` \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..c2066bd --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1 @@ +{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>\n'}}{% endif %} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..2a8a473 --- /dev/null +++ b/config.json @@ -0,0 +1,28 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 3584, + "initializer_range": 0.02, + "intermediate_size": 18944, + "max_position_embeddings": 131072, + "max_window_layers": 28, + "model_type": "qwen2", + "num_attention_heads": 28, + "num_hidden_layers": 28, + "num_key_value_heads": 4, + "rms_norm_eps": 1e-06, + "rope_theta": 10000, + "sliding_window": 4096, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.44.0", + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/generate_config.json b/generate_config.json new file mode 100644 index 0000000..3fd992e --- /dev/null +++ b/generate_config.json @@ -0,0 +1,9 @@ +{ + "_from_model_config": true, + "bos_token_id": 151646, + "eos_token_id": 151643, + "do_sample": true, + "temperature": 0.7, + "top_p": 0.95, + "transformers_version": "4.54.0" +} \ No newline at end of file diff --git a/model-00001.safetensors b/model-00001.safetensors new file mode 100644 index 0000000..cce65c9 --- /dev/null +++ b/model-00001.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3aeb47ceecf799a275ab4ef703dff6f93c32b6cc76d8c4456c6af072b850c84 +size 4253782776 diff --git a/model-00002.safetensors b/model-00002.safetensors new file mode 100644 index 0000000..ddd7014 --- /dev/null +++ b/model-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad13314f7acb45e80dd89524f0e655f7207cdff6db6c84f8bf821af0eb4c9ab4 +size 4195052616 diff --git a/model-00003.safetensors b/model-00003.safetensors new file mode 100644 index 0000000..afb8e37 --- /dev/null +++ b/model-00003.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00b95d237b2a1c92d8d7fe34e0f6183137f40dd572ba3c97f58f0d70897bcf94 +size 4195052632 diff --git a/model-00004.safetensors b/model-00004.safetensors new file mode 100644 index 0000000..b8fd011 --- /dev/null +++ b/model-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4406405f063a122734d75e39689906626b3600152477e2ca182f99b137d7d88 +size 2587383656 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..5fd42a7 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,344 @@ +{ + "metadata": {}, + "weight_map": { + "model.layers.0.self_attn.q_proj.weight": "model-00001.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001.safetensors", + "model.layers.0.self_attn.q_proj.bias": "model-00001.safetensors", + "model.layers.0.self_attn.k_proj.bias": "model-00001.safetensors", + "model.layers.0.self_attn.v_proj.bias": "model-00001.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001.safetensors", + "model.layers.1.self_attn.q_proj.bias": "model-00001.safetensors", + "model.layers.1.self_attn.k_proj.bias": "model-00001.safetensors", + "model.layers.1.self_attn.v_proj.bias": "model-00001.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001.safetensors", + "model.layers.2.self_attn.q_proj.bias": "model-00001.safetensors", + "model.layers.2.self_attn.k_proj.bias": "model-00001.safetensors", + "model.layers.2.self_attn.v_proj.bias": "model-00001.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001.safetensors", + "model.layers.3.self_attn.q_proj.bias": "model-00001.safetensors", + "model.layers.3.self_attn.k_proj.bias": "model-00001.safetensors", + "model.layers.3.self_attn.v_proj.bias": "model-00001.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001.safetensors", + "model.layers.4.self_attn.q_proj.bias": "model-00001.safetensors", + "model.layers.4.self_attn.k_proj.bias": "model-00001.safetensors", + "model.layers.4.self_attn.v_proj.bias": "model-00001.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001.safetensors", + "model.layers.5.self_attn.q_proj.bias": "model-00001.safetensors", + "model.layers.5.self_attn.k_proj.bias": "model-00001.safetensors", + "model.layers.5.self_attn.v_proj.bias": "model-00001.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001.safetensors", + "model.layers.6.self_attn.q_proj.bias": "model-00001.safetensors", + "model.layers.6.self_attn.k_proj.bias": "model-00001.safetensors", + "model.layers.6.self_attn.v_proj.bias": "model-00001.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001.safetensors", + "model.layers.7.self_attn.q_proj.bias": "model-00001.safetensors", + "model.layers.7.self_attn.k_proj.bias": "model-00001.safetensors", + "model.layers.7.self_attn.v_proj.bias": "model-00001.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001.safetensors", + "model.layers.8.self_attn.q_proj.bias": "model-00001.safetensors", + "model.layers.8.self_attn.k_proj.bias": "model-00001.safetensors", + "model.layers.8.self_attn.v_proj.bias": "model-00001.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001.safetensors", + "model.layers.9.self_attn.q_proj.bias": "model-00001.safetensors", + "model.layers.9.self_attn.k_proj.bias": "model-00001.safetensors", + "model.layers.9.self_attn.v_proj.bias": "model-00001.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002.safetensors", + "model.layers.10.self_attn.q_proj.bias": "model-00002.safetensors", + "model.layers.10.self_attn.k_proj.bias": "model-00002.safetensors", + "model.layers.10.self_attn.v_proj.bias": "model-00002.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002.safetensors", + "model.layers.11.self_attn.q_proj.bias": "model-00002.safetensors", + "model.layers.11.self_attn.k_proj.bias": "model-00002.safetensors", + "model.layers.11.self_attn.v_proj.bias": "model-00002.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002.safetensors", + "model.layers.12.self_attn.q_proj.bias": "model-00002.safetensors", + "model.layers.12.self_attn.k_proj.bias": "model-00002.safetensors", + "model.layers.12.self_attn.v_proj.bias": "model-00002.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002.safetensors", + "model.layers.13.self_attn.q_proj.bias": "model-00002.safetensors", + "model.layers.13.self_attn.k_proj.bias": "model-00002.safetensors", + "model.layers.13.self_attn.v_proj.bias": "model-00002.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002.safetensors", + "model.layers.14.self_attn.q_proj.bias": "model-00002.safetensors", + "model.layers.14.self_attn.k_proj.bias": "model-00002.safetensors", + "model.layers.14.self_attn.v_proj.bias": "model-00002.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002.safetensors", + "model.layers.15.self_attn.q_proj.bias": "model-00002.safetensors", + "model.layers.15.self_attn.k_proj.bias": "model-00002.safetensors", + "model.layers.15.self_attn.v_proj.bias": "model-00002.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002.safetensors", + "model.layers.16.self_attn.q_proj.bias": "model-00002.safetensors", + "model.layers.16.self_attn.k_proj.bias": "model-00002.safetensors", + "model.layers.16.self_attn.v_proj.bias": "model-00002.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002.safetensors", + "model.layers.17.self_attn.q_proj.bias": "model-00002.safetensors", + "model.layers.17.self_attn.k_proj.bias": "model-00002.safetensors", + "model.layers.17.self_attn.v_proj.bias": "model-00002.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002.safetensors", + "model.layers.18.self_attn.q_proj.bias": "model-00002.safetensors", + "model.layers.18.self_attn.k_proj.bias": "model-00002.safetensors", + "model.layers.18.self_attn.v_proj.bias": "model-00002.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00003.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00003.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00003.safetensors", + "model.layers.18.input_layernorm.weight": "model-00003.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00003.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00003.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00003.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00003.safetensors", + "model.layers.19.self_attn.q_proj.bias": "model-00003.safetensors", + "model.layers.19.self_attn.k_proj.bias": "model-00003.safetensors", + "model.layers.19.self_attn.v_proj.bias": "model-00003.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00003.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00003.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00003.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00003.safetensors", + "model.layers.19.input_layernorm.weight": "model-00003.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00003.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00003.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00003.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00003.safetensors", + "model.layers.20.self_attn.q_proj.bias": "model-00003.safetensors", + "model.layers.20.self_attn.k_proj.bias": "model-00003.safetensors", + "model.layers.20.self_attn.v_proj.bias": "model-00003.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00003.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00003.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00003.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00003.safetensors", + "model.layers.20.input_layernorm.weight": "model-00003.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00003.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00003.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00003.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00003.safetensors", + "model.layers.21.self_attn.q_proj.bias": "model-00003.safetensors", + "model.layers.21.self_attn.k_proj.bias": "model-00003.safetensors", + "model.layers.21.self_attn.v_proj.bias": "model-00003.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00003.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00003.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00003.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00003.safetensors", + "model.layers.22.self_attn.q_proj.bias": "model-00003.safetensors", + "model.layers.22.self_attn.k_proj.bias": "model-00003.safetensors", + "model.layers.22.self_attn.v_proj.bias": "model-00003.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00003.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003.safetensors", + "model.layers.23.self_attn.q_proj.bias": "model-00003.safetensors", + "model.layers.23.self_attn.k_proj.bias": "model-00003.safetensors", + "model.layers.23.self_attn.v_proj.bias": "model-00003.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003.safetensors", + "model.layers.24.self_attn.q_proj.bias": "model-00003.safetensors", + "model.layers.24.self_attn.k_proj.bias": "model-00003.safetensors", + "model.layers.24.self_attn.v_proj.bias": "model-00003.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003.safetensors", + "model.layers.25.self_attn.q_proj.bias": "model-00003.safetensors", + "model.layers.25.self_attn.k_proj.bias": "model-00003.safetensors", + "model.layers.25.self_attn.v_proj.bias": "model-00003.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003.safetensors", + "model.layers.26.self_attn.q_proj.bias": "model-00003.safetensors", + "model.layers.26.self_attn.k_proj.bias": "model-00003.safetensors", + "model.layers.26.self_attn.v_proj.bias": "model-00003.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003.safetensors", + "model.layers.27.self_attn.q_proj.bias": "model-00003.safetensors", + "model.layers.27.self_attn.k_proj.bias": "model-00003.safetensors", + "model.layers.27.self_attn.v_proj.bias": "model-00003.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00004.safetensors", + "model.embed_tokens.weight": "model-00004.safetensors", + "model.norm.weight": "model-00004.safetensors", + "lm_head.weight": "model-00004.safetensors" + } +} \ No newline at end of file diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..1d385d6 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..ef164b6 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8eedbf07704361ce2fa255b6fa6d891bb8455d813cd74d749de0b4497b4702c5 +size 11423149 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..d01d6b3 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,210 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": null, + "added_tokens_decoder": { + "151643": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|User|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151645": { + "content": "<|Assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151646": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|EOT|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151648": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151649": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151665": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "151666": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "bos_token": "<|begin▁of▁sentence|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|end▁of▁sentence|>", + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 163840, + "pad_token": "<|end▁of▁sentence|>", + "sp_model_kwargs": {}, + "tokenizer_class": "LlamaTokenizerFast", + "unk_token": null, + "use_default_system_prompt": false +}