初始化项目,由ModelHub XC社区提供模型
Model: AI-ModelScope/internlm3-8b-instruct Source: Original Platform
This commit is contained in:
35
.gitattributes
vendored
Normal file
35
.gitattributes
vendored
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.model filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
201
LICENSE.txt
Normal file
201
LICENSE.txt
Normal file
@@ -0,0 +1,201 @@
|
|||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright 2023-2024 Shanghai AI Laboratory
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
889
README.md
Normal file
889
README.md
Normal file
@@ -0,0 +1,889 @@
|
|||||||
|
---
|
||||||
|
license: apache-2.0
|
||||||
|
pipeline_tag: text-generation
|
||||||
|
---
|
||||||
|
# InternLM
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<div align="center">
|
||||||
|
<img src="https://github.com/InternLM/InternLM/assets/22529082/b9788105-8892-4398-8b47-b513a292378e" width="200"/>
|
||||||
|
|
||||||
|
<div> </div>
|
||||||
|
<div align="center">
|
||||||
|
<b><font size="5">InternLM</font></b>
|
||||||
|
<sup>
|
||||||
|
<a href="https://internlm.intern-ai.org.cn/">
|
||||||
|
<i><font size="4">HOT</font></i>
|
||||||
|
</a>
|
||||||
|
</sup>
|
||||||
|
<div> </div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
[](https://github.com/internLM/OpenCompass/)
|
||||||
|
|
||||||
|
[💻Github Repo](https://github.com/InternLM/InternLM) • [🤗Demo](https://huggingface.co/spaces/internlm/internlm3-8b-instruct) • [🤔Reporting Issues](https://github.com/InternLM/InternLM/issues/new) • [📜Technical Report](https://arxiv.org/abs/2403.17297)
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
👋 join us on <a href="https://discord.gg/xa29JuW87d" target="_blank">Discord</a> and <a href="https://github.com/InternLM/InternLM/assets/25839884/a6aad896-7232-4220-ac84-9e070c2633ce" target="_blank">WeChat</a>
|
||||||
|
</p>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
InternLM3 has open-sourced an 8-billion parameter instruction model, InternLM3-8B-Instruct, designed for general-purpose usage and advanced reasoning. This model has the following characteristics:
|
||||||
|
|
||||||
|
- **Enhanced performance at reduced cost**:
|
||||||
|
State-of-the-art performance on reasoning and knowledge-intensive tasks surpass models like Llama3.1-8B and Qwen2.5-7B. Remarkably, InternLM3 is trained on only 4 trillion high-quality tokens, saving more than 75% of the training cost compared to other LLMs of similar scale.
|
||||||
|
- **Deep thinking capability**:
|
||||||
|
InternLM3 supports both the deep thinking mode for solving complicated reasoning tasks via the long chain-of-thought and the normal response mode for fluent user interactions.
|
||||||
|
|
||||||
|
## InternLM3-8B-Instruct
|
||||||
|
|
||||||
|
### Performance Evaluation
|
||||||
|
|
||||||
|
We conducted a comprehensive evaluation of InternLM using the open-source evaluation tool [OpenCompass](https://github.com/internLM/OpenCompass/). The evaluation covered five dimensions of capabilities: disciplinary competence, language competence, knowledge competence, inference competence, and comprehension competence. Here are some of the evaluation results, and you can visit the [OpenCompass leaderboard](https://rank.opencompass.org.cn) for more evaluation results.
|
||||||
|
|
||||||
|
| | Benchmark | InternLM3-8B-Instruct | Qwen2.5-7B-Instruct | Llama3.1-8B-Instruct | GPT-4o-mini(closed source) |
|
||||||
|
| ------------ | ------------------------------- | --------------------- | ------------------- | -------------------- | -------------------------- |
|
||||||
|
| General | CMMLU(0-shot) | **83.1** | 75.8 | 53.9 | 66.0 |
|
||||||
|
| | MMLU(0-shot) | 76.6 | **76.8** | 71.8 | 82.7 |
|
||||||
|
| | MMLU-Pro(0-shot) | **57.6** | 56.2 | 48.1 | 64.1 |
|
||||||
|
| Reasoning | GPQA-Diamond(0-shot) | **37.4** | 33.3 | 24.2 | 42.9 |
|
||||||
|
| | DROP(0-shot) | **83.1** | 80.4 | 81.6 | 85.2 |
|
||||||
|
| | HellaSwag(10-shot) | **91.2** | 85.3 | 76.7 | 89.5 |
|
||||||
|
| | KOR-Bench(0-shot) | **56.4** | 44.6 | 47.7 | 58.2 |
|
||||||
|
| MATH | MATH-500(0-shot) | **83.0*** | 72.4 | 48.4 | 74.0 |
|
||||||
|
| | AIME2024(0-shot) | **20.0*** | 16.7 | 6.7 | 13.3 |
|
||||||
|
| Coding | LiveCodeBench(2407-2409 Pass@1) | **17.8** | 16.8 | 12.9 | 21.8 |
|
||||||
|
| | HumanEval(Pass@1) | 82.3 | **85.4** | 72.0 | 86.6 |
|
||||||
|
| Instrunction | IFEval(Prompt-Strict) | **79.3** | 71.7 | 75.2 | 79.7 |
|
||||||
|
| Long Context | RULER(4-128K Average) | 87.9 | 81.4 | **88.5** | 90.7 |
|
||||||
|
| Chat | AlpacaEval 2.0(LC WinRate) | **51.1** | 30.3 | 25.0 | 50.7 |
|
||||||
|
| | WildBench(Raw Score) | **33.1** | 23.3 | 1.5 | 40.3 |
|
||||||
|
| | MT-Bench-101(Score 1-10) | **8.59** | 8.49 | 8.37 | 8.87 |
|
||||||
|
|
||||||
|
- Values marked in bold indicate the **highest** in open source models
|
||||||
|
- The evaluation results were obtained from [OpenCompass](https://github.com/internLM/OpenCompass/) (some data marked with *, which means evaluating with Thinking Mode), and evaluation configuration can be found in the configuration files provided by [OpenCompass](https://github.com/internLM/OpenCompass/).
|
||||||
|
- The evaluation data may have numerical differences due to the version iteration of [OpenCompass](https://github.com/internLM/OpenCompass/), so please refer to the latest evaluation results of [OpenCompass](https://github.com/internLM/OpenCompass/).
|
||||||
|
|
||||||
|
**Limitations:** Although we have made efforts to ensure the safety of the model during the training process and to encourage the model to generate text that complies with ethical and legal requirements, the model may still produce unexpected outputs due to its size and probabilistic generation paradigm. For example, the generated responses may contain biases, discrimination, or other harmful content. Please do not propagate such content. We are not responsible for any consequences resulting from the dissemination of harmful information.
|
||||||
|
### Requirements
|
||||||
|
```python
|
||||||
|
transformers >= 4.48
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Conversation Mode
|
||||||
|
|
||||||
|
#### Transformers inference
|
||||||
|
|
||||||
|
To load the InternLM3 8B Instruct model using Transformers, use the following code:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import torch
|
||||||
|
from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||||
|
|
||||||
|
model_dir = "internlm/internlm3-8b-instruct"
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
|
||||||
|
# Set `torch_dtype=torch.float16` to load model in float16, otherwise it will be loaded as float32 and might cause OOM Error.
|
||||||
|
model = AutoModelForCausalLM.from_pretrained(model_dir, trust_remote_code=True, torch_dtype=torch.bfloat16).cuda()
|
||||||
|
# (Optional) If on low resource devices, you can load model in 4-bit or 8-bit to further save GPU memory via bitsandbytes.
|
||||||
|
# InternLM3 8B in 4bit will cost nearly 8GB GPU memory.
|
||||||
|
# pip install -U bitsandbytes
|
||||||
|
# 8-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_8bit=True)
|
||||||
|
# 4-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_4bit=True)
|
||||||
|
model = model.eval()
|
||||||
|
|
||||||
|
system_prompt = """You are an AI assistant whose name is InternLM (书生·浦语).
|
||||||
|
- InternLM (书生·浦语) is a conversational language model that is developed by Shanghai AI Laboratory (上海人工智能实验室). It is designed to be helpful, honest, and harmless.
|
||||||
|
- InternLM (书生·浦语) can understand and communicate fluently in the language chosen by the user such as English and 中文."""
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": system_prompt},
|
||||||
|
{"role": "user", "content": "Please tell me five scenic spots in Shanghai"},
|
||||||
|
]
|
||||||
|
tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
|
||||||
|
|
||||||
|
generated_ids = model.generate(tokenized_chat, max_new_tokens=1024, temperature=1, repetition_penalty=1.005, top_k=40, top_p=0.8)
|
||||||
|
|
||||||
|
generated_ids = [
|
||||||
|
output_ids[len(input_ids):] for input_ids, output_ids in zip(tokenized_chat, generated_ids)
|
||||||
|
]
|
||||||
|
prompt = tokenizer.batch_decode(tokenized_chat)[0]
|
||||||
|
print(prompt)
|
||||||
|
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### LMDeploy inference
|
||||||
|
LMDeploy is a toolkit for compressing, deploying, and serving LLM, developed by the MMRazor and MMDeploy teams.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install lmdeploy
|
||||||
|
```
|
||||||
|
|
||||||
|
You can run batch inference locally with the following python code:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import lmdeploy
|
||||||
|
model_dir = "internlm/internlm3-8b-instruct"
|
||||||
|
pipe = lmdeploy.pipeline(model_dir)
|
||||||
|
response = pipe("Please tell me five scenic spots in Shanghai")
|
||||||
|
print(response)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
Or you can launch an OpenAI compatible server with the following command:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
lmdeploy serve api_server internlm/internlm3-8b-instruct --model-name internlm3-8b-instruct --server-port 23333
|
||||||
|
```
|
||||||
|
|
||||||
|
Then you can send a chat request to the server:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:23333/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "internlm3-8b-instruct",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Please tell me five scenic spots in Shanghai"}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Find more details in the [LMDeploy documentation](https://lmdeploy.readthedocs.io/en/latest/)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#### Ollama inference
|
||||||
|
|
||||||
|
First install ollama,
|
||||||
|
|
||||||
|
```python
|
||||||
|
# install ollama
|
||||||
|
curl -fsSL https://ollama.com/install.sh | sh
|
||||||
|
# fetch model
|
||||||
|
ollama pull internlm/internlm3-8b-instruct
|
||||||
|
# install
|
||||||
|
pip install ollama
|
||||||
|
```
|
||||||
|
|
||||||
|
inference code,
|
||||||
|
|
||||||
|
```python
|
||||||
|
import ollama
|
||||||
|
|
||||||
|
system_prompt = """You are an AI assistant whose name is InternLM (书生·浦语).
|
||||||
|
- InternLM (书生·浦语) is a conversational language model that is developed by Shanghai AI Laboratory (上海人工智能实验室). It is designed to be helpful, honest, and harmless.
|
||||||
|
- InternLM (书生·浦语) can understand and communicate fluently in the language chosen by the user such as English and 中文."""
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": system_prompt,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Please tell me five scenic spots in Shanghai"
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
stream = ollama.chat(
|
||||||
|
model='internlm/internlm3-8b-instruct',
|
||||||
|
messages=messages,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
for chunk in stream:
|
||||||
|
print(chunk['message']['content'], end='', flush=True)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
#### vLLM inference
|
||||||
|
|
||||||
|
Refer to [installation](https://docs.vllm.ai/en/latest/getting_started/installation/index.html) to install the latest code of vllm
|
||||||
|
|
||||||
|
```python
|
||||||
|
pip install vllm --pre --extra-index-url https://wheels.vllm.ai/nightly
|
||||||
|
```
|
||||||
|
|
||||||
|
inference code:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from vllm import LLM, SamplingParams
|
||||||
|
|
||||||
|
llm = LLM(model="internlm/internlm3-8b-instruct")
|
||||||
|
sampling_params = SamplingParams(temperature=1, repetition_penalty=1.005, top_k=40, top_p=0.8)
|
||||||
|
|
||||||
|
system_prompt = """You are an AI assistant whose name is InternLM (书生·浦语).
|
||||||
|
- InternLM (书生·浦语) is a conversational language model that is developed by Shanghai AI Laboratory (上海人工智能实验室). It is designed to be helpful, honest, and harmless.
|
||||||
|
- InternLM (书生·浦语) can understand and communicate fluently in the language chosen by the user such as English and 中文."""
|
||||||
|
|
||||||
|
prompts = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": system_prompt,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Please tell me five scenic spots in Shanghai"
|
||||||
|
},
|
||||||
|
]
|
||||||
|
outputs = llm.chat(prompts,
|
||||||
|
sampling_params=sampling_params,
|
||||||
|
use_tqdm=False)
|
||||||
|
print(outputs)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### Thinking Mode
|
||||||
|
#### Thinking Demo
|
||||||
|
|
||||||
|
<img src="https://github.com/InternLM/InternLM/blob/017ba7446d20ecc3b9ab8e7b66cc034500868ab4/assets/solve_puzzle.png?raw=true" width="400"/>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#### Thinking system prompt
|
||||||
|
```python
|
||||||
|
thinking_system_prompt = """You are an expert mathematician with extensive experience in mathematical competitions. You approach problems through systematic thinking and rigorous reasoning. When solving problems, follow these thought processes:
|
||||||
|
## Deep Understanding
|
||||||
|
Take time to fully comprehend the problem before attempting a solution. Consider:
|
||||||
|
- What is the real question being asked?
|
||||||
|
- What are the given conditions and what do they tell us?
|
||||||
|
- Are there any special restrictions or assumptions?
|
||||||
|
- Which information is crucial and which is supplementary?
|
||||||
|
## Multi-angle Analysis
|
||||||
|
Before solving, conduct thorough analysis:
|
||||||
|
- What mathematical concepts and properties are involved?
|
||||||
|
- Can you recall similar classic problems or solution methods?
|
||||||
|
- Would diagrams or tables help visualize the problem?
|
||||||
|
- Are there special cases that need separate consideration?
|
||||||
|
## Systematic Thinking
|
||||||
|
Plan your solution path:
|
||||||
|
- Propose multiple possible approaches
|
||||||
|
- Analyze the feasibility and merits of each method
|
||||||
|
- Choose the most appropriate method and explain why
|
||||||
|
- Break complex problems into smaller, manageable steps
|
||||||
|
## Rigorous Proof
|
||||||
|
During the solution process:
|
||||||
|
- Provide solid justification for each step
|
||||||
|
- Include detailed proofs for key conclusions
|
||||||
|
- Pay attention to logical connections
|
||||||
|
- Be vigilant about potential oversights
|
||||||
|
## Repeated Verification
|
||||||
|
After completing your solution:
|
||||||
|
- Verify your results satisfy all conditions
|
||||||
|
- Check for overlooked special cases
|
||||||
|
- Consider if the solution can be optimized or simplified
|
||||||
|
- Review your reasoning process
|
||||||
|
Remember:
|
||||||
|
1. Take time to think thoroughly rather than rushing to an answer
|
||||||
|
2. Rigorously prove each key conclusion
|
||||||
|
3. Keep an open mind and try different approaches
|
||||||
|
4. Summarize valuable problem-solving methods
|
||||||
|
5. Maintain healthy skepticism and verify multiple times
|
||||||
|
Your response should reflect deep mathematical understanding and precise logical thinking, making your solution path and reasoning clear to others.
|
||||||
|
When you're ready, present your complete solution with:
|
||||||
|
- Clear problem understanding
|
||||||
|
- Detailed solution process
|
||||||
|
- Key insights
|
||||||
|
- Thorough verification
|
||||||
|
Focus on clear, logical progression of ideas and thorough explanation of your mathematical reasoning. Provide answers in the same language as the user asking the question, repeat the final answer using a '\\boxed{}' without any units, you have [[8192]] tokens to complete the answer.
|
||||||
|
"""
|
||||||
|
```
|
||||||
|
#### Transformers inference
|
||||||
|
```python
|
||||||
|
import torch
|
||||||
|
from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||||
|
|
||||||
|
model_dir = "internlm/internlm3-8b-instruct"
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
|
||||||
|
# Set `torch_dtype=torch.float16` to load model in float16, otherwise it will be loaded as float32 and might cause OOM Error.
|
||||||
|
model = AutoModelForCausalLM.from_pretrained(model_dir, trust_remote_code=True, torch_dtype=torch.bfloat16).cuda()
|
||||||
|
# (Optional) If on low resource devices, you can load model in 4-bit or 8-bit to further save GPU memory via bitsandbytes.
|
||||||
|
# InternLM3 8B in 4bit will cost nearly 8GB GPU memory.
|
||||||
|
# pip install -U bitsandbytes
|
||||||
|
# 8-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_8bit=True)
|
||||||
|
# 4-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_4bit=True)
|
||||||
|
model = model.eval()
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": thinking_system_prompt},
|
||||||
|
{"role": "user", "content": "Given the function\(f(x)=\mathrm{e}^{x}-ax - a^{3}\),\n(1) When \(a = 1\), find the equation of the tangent line to the curve \(y = f(x)\) at the point \((1,f(1))\).\n(2) If \(f(x)\) has a local minimum and the minimum value is less than \(0\), determine the range of values for \(a\)."},
|
||||||
|
]
|
||||||
|
tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
|
||||||
|
|
||||||
|
generated_ids = model.generate(tokenized_chat, max_new_tokens=8192)
|
||||||
|
|
||||||
|
generated_ids = [
|
||||||
|
output_ids[len(input_ids):] for input_ids, output_ids in zip(tokenized_chat, generated_ids)
|
||||||
|
]
|
||||||
|
prompt = tokenizer.batch_decode(tokenized_chat)[0]
|
||||||
|
print(prompt)
|
||||||
|
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
#### LMDeploy inference
|
||||||
|
|
||||||
|
LMDeploy is a toolkit for compressing, deploying, and serving LLM.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install lmdeploy
|
||||||
|
```
|
||||||
|
|
||||||
|
You can run batch inference locally with the following python code:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from lmdeploy import pipeline, GenerationConfig, ChatTemplateConfig
|
||||||
|
model_dir = "internlm/internlm3-8b-instruct"
|
||||||
|
chat_template_config = ChatTemplateConfig(model_name='internlm3')
|
||||||
|
pipe = pipeline(model_dir, chat_template_config=chat_template_config)
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": thinking_system_prompt},
|
||||||
|
{"role": "user", "content": "Given the function\(f(x)=\mathrm{e}^{x}-ax - a^{3}\),\n(1) When \(a = 1\), find the equation of the tangent line to the curve \(y = f(x)\) at the point \((1,f(1))\).\n(2) If \(f(x)\) has a local minimum and the minimum value is less than \(0\), determine the range of values for \(a\)."},
|
||||||
|
]
|
||||||
|
|
||||||
|
response = pipe(messages, gen_config=GenerationConfig(max_new_tokens=2048))
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Ollama inference
|
||||||
|
|
||||||
|
First install ollama,
|
||||||
|
|
||||||
|
```python
|
||||||
|
# install ollama
|
||||||
|
curl -fsSL https://ollama.com/install.sh | sh
|
||||||
|
# fetch model
|
||||||
|
ollama pull internlm/internlm3-8b-instruct
|
||||||
|
# install
|
||||||
|
pip install ollama
|
||||||
|
```
|
||||||
|
|
||||||
|
inference code,
|
||||||
|
|
||||||
|
```python
|
||||||
|
import ollama
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": thinking_system_prompt,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Given the function\(f(x)=\mathrm{e}^{x}-ax - a^{3}\),\n(1) When \(a = 1\), find the equation of the tangent line to the curve \(y = f(x)\) at the point \((1,f(1))\).\n(2) If \(f(x)\) has a local minimum and the minimum value is less than \(0\), determine the range of values for \(a\)."
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
stream = ollama.chat(
|
||||||
|
model='internlm/internlm3-8b-instruct',
|
||||||
|
messages=messages,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
for chunk in stream:
|
||||||
|
print(chunk['message']['content'], end='', flush=True)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
####
|
||||||
|
|
||||||
|
#### vLLM inference
|
||||||
|
|
||||||
|
Refer to [installation](https://docs.vllm.ai/en/latest/getting_started/installation/index.html) to install the latest code of vllm
|
||||||
|
|
||||||
|
```python
|
||||||
|
pip install vllm --pre --extra-index-url https://wheels.vllm.ai/nightly
|
||||||
|
```
|
||||||
|
|
||||||
|
inference code
|
||||||
|
|
||||||
|
|
||||||
|
```python
|
||||||
|
from vllm import LLM, SamplingParams
|
||||||
|
|
||||||
|
llm = LLM(model="internlm/internlm3-8b-instruct")
|
||||||
|
sampling_params = SamplingParams(temperature=1, repetition_penalty=1.005, top_k=40, top_p=0.8, max_tokens=8192)
|
||||||
|
|
||||||
|
prompts = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": thinking_system_prompt,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Given the function\(f(x)=\mathrm{e}^{x}-ax - a^{3}\),\n(1) When \(a = 1\), find the equation of the tangent line to the curve \(y = f(x)\) at the point \((1,f(1))\).\n(2) If \(f(x)\) has a local minimum and the minimum value is less than \(0\), determine the range of values for \(a\)."
|
||||||
|
},
|
||||||
|
]
|
||||||
|
outputs = llm.chat(prompts,
|
||||||
|
sampling_params=sampling_params,
|
||||||
|
use_tqdm=False)
|
||||||
|
print(outputs)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Open Source License
|
||||||
|
|
||||||
|
Code and model weights are licensed under Apache-2.0.
|
||||||
|
|
||||||
|
## Citation
|
||||||
|
|
||||||
|
```
|
||||||
|
@misc{cai2024internlm2,
|
||||||
|
title={InternLM2 Technical Report},
|
||||||
|
author={Zheng Cai and Maosong Cao and Haojiong Chen and Kai Chen and Keyu Chen and Xin Chen and Xun Chen and Zehui Chen and Zhi Chen and Pei Chu and Xiaoyi Dong and Haodong Duan and Qi Fan and Zhaoye Fei and Yang Gao and Jiaye Ge and Chenya Gu and Yuzhe Gu and Tao Gui and Aijia Guo and Qipeng Guo and Conghui He and Yingfan Hu and Ting Huang and Tao Jiang and Penglong Jiao and Zhenjiang Jin and Zhikai Lei and Jiaxing Li and Jingwen Li and Linyang Li and Shuaibin Li and Wei Li and Yining Li and Hongwei Liu and Jiangning Liu and Jiawei Hong and Kaiwen Liu and Kuikun Liu and Xiaoran Liu and Chengqi Lv and Haijun Lv and Kai Lv and Li Ma and Runyuan Ma and Zerun Ma and Wenchang Ning and Linke Ouyang and Jiantao Qiu and Yuan Qu and Fukai Shang and Yunfan Shao and Demin Song and Zifan Song and Zhihao Sui and Peng Sun and Yu Sun and Huanze Tang and Bin Wang and Guoteng Wang and Jiaqi Wang and Jiayu Wang and Rui Wang and Yudong Wang and Ziyi Wang and Xingjian Wei and Qizhen Weng and Fan Wu and Yingtong Xiong and Chao Xu and Ruiliang Xu and Hang Yan and Yirong Yan and Xiaogui Yang and Haochen Ye and Huaiyuan Ying and Jia Yu and Jing Yu and Yuhang Zang and Chuyu Zhang and Li Zhang and Pan Zhang and Peng Zhang and Ruijie Zhang and Shuo Zhang and Songyang Zhang and Wenjian Zhang and Wenwei Zhang and Xingcheng Zhang and Xinyue Zhang and Hui Zhao and Qian Zhao and Xiaomeng Zhao and Fengzhe Zhou and Zaida Zhou and Jingming Zhuo and Yicheng Zou and Xipeng Qiu and Yu Qiao and Dahua Lin},
|
||||||
|
year={2024},
|
||||||
|
eprint={2403.17297},
|
||||||
|
archivePrefix={arXiv},
|
||||||
|
primaryClass={cs.CL}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## 简介
|
||||||
|
|
||||||
|
### InternLM3-8B-Instruct
|
||||||
|
|
||||||
|
InternLM3,即书生·浦语大模型第3代,开源了80亿参数,面向通用使用与高阶推理的指令模型(InternLM3-8B-Instruct)。模型具备以下特点:
|
||||||
|
|
||||||
|
- **更低的代价取得更高的性能**:
|
||||||
|
在推理、知识类任务上取得同量级最优性能,超过Llama3.1-8B和Qwen2.5-7B。值得关注的是InternLM3只用了4万亿词元进行训练,对比同级别模型训练成本节省75%以上。
|
||||||
|
- **深度思考能力**:
|
||||||
|
InternLM3支持通过长思维链求解复杂推理任务的深度思考模式,同时还兼顾了用户体验更流畅的通用回复模式。
|
||||||
|
|
||||||
|
#### 性能评测
|
||||||
|
|
||||||
|
我们使用开源评测工具 [OpenCompass](https://github.com/internLM/OpenCompass/) 从学科综合能力、语言能力、知识能力、推理能力、理解能力五大能力维度对InternLM开展全面评测,部分评测结果如下表所示,欢迎访问[ OpenCompass 榜单 ](https://rank.opencompass.org.cn)获取更多的评测结果。
|
||||||
|
|
||||||
|
| | 评测集\模型 | InternLM3-8B-Instruct | Qwen2.5-7B-Instruct | Llama3.1-8B-Instruct | GPT-4o-mini(闭源) |
|
||||||
|
| ------------ | ------------------------------- | --------------------- | ------------------- | -------------------- | ----------------- |
|
||||||
|
| General | CMMLU(0-shot) | **83.1** | 75.8 | 53.9 | 66.0 |
|
||||||
|
| | MMLU(0-shot) | 76.6 | **76.8** | 71.8 | 82.7 |
|
||||||
|
| | MMLU-Pro(0-shot) | **57.6** | 56.2 | 48.1 | 64.1 |
|
||||||
|
| Reasoning | GPQA-Diamond(0-shot) | **37.4** | 33.3 | 24.2 | 42.9 |
|
||||||
|
| | DROP(0-shot) | **83.1** | 80.4 | 81.6 | 85.2 |
|
||||||
|
| | HellaSwag(10-shot) | **91.2** | 85.3 | 76.7 | 89.5 |
|
||||||
|
| | KOR-Bench(0-shot) | **56.4** | 44.6 | 47.7 | 58.2 |
|
||||||
|
| MATH | MATH-500(0-shot) | **83.0*** | 72.4 | 48.4 | 74.0 |
|
||||||
|
| | AIME2024(0-shot) | **20.0*** | 16.7 | 6.7 | 13.3 |
|
||||||
|
| Coding | LiveCodeBench(2407-2409 Pass@1) | **17.8** | 16.8 | 12.9 | 21.8 |
|
||||||
|
| | HumanEval(Pass@1) | 82.3 | **85.4** | 72.0 | 86.6 |
|
||||||
|
| Instrunction | IFEval(Prompt-Strict) | **79.3** | 71.7 | 75.2 | 79.7 |
|
||||||
|
| LongContext | RULER(4-128K Average) | 87.9 | 81.4 | **88.5** | 90.7 |
|
||||||
|
| Chat | AlpacaEval 2.0(LC WinRate) | **51.1** | 30.3 | 25.0 | 50.7 |
|
||||||
|
| | WildBench(Raw Score) | **33.1** | 23.3 | 1.5 | 40.3 |
|
||||||
|
| | MT-Bench-101(Score 1-10) | **8.59** | 8.49 | 8.37 | 8.87 |
|
||||||
|
|
||||||
|
- 表中标粗的数值表示在对比的开源模型中的最高值。
|
||||||
|
- 以上评测结果基于 [OpenCompass](https://github.com/internLM/OpenCompass/) 获得(部分数据标注`*`代表使用深度思考模式进行评测),具体测试细节可参见 [OpenCompass](https://github.com/internLM/OpenCompass/) 中提供的配置文件。
|
||||||
|
- 评测数据会因 [OpenCompass](https://github.com/internLM/OpenCompass/) 的版本迭代而存在数值差异,请以 [OpenCompass](https://github.com/internLM/OpenCompass/) 最新版的评测结果为主。
|
||||||
|
|
||||||
|
**局限性:** 尽管在训练过程中我们非常注重模型的安全性,尽力促使模型输出符合伦理和法律要求的文本,但受限于模型大小以及概率生成范式,模型可能会产生各种不符合预期的输出,例如回复内容包含偏见、歧视等有害内容,请勿传播这些内容。由于传播不良信息导致的任何后果,本项目不承担责任。
|
||||||
|
|
||||||
|
#### 依赖
|
||||||
|
|
||||||
|
```python
|
||||||
|
transformers >= 4.48
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#### 常规对话模式
|
||||||
|
|
||||||
|
##### Transformers 推理
|
||||||
|
|
||||||
|
通过以下的代码加载 InternLM3 8B Instruct 模型
|
||||||
|
|
||||||
|
```python
|
||||||
|
import torch
|
||||||
|
from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||||
|
|
||||||
|
model_dir = "internlm/internlm3-8b-instruct"
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
|
||||||
|
# Set `torch_dtype=torch.float16` to load model in float16, otherwise it will be loaded as float32 and might cause OOM Error.
|
||||||
|
model = AutoModelForCausalLM.from_pretrained(model_dir, trust_remote_code=True, torch_dtype=torch.bfloat16).cuda()
|
||||||
|
# (Optional) If on low resource devices, you can load model in 4-bit or 8-bit to further save GPU memory via bitsandbytes.
|
||||||
|
# InternLM3 8B in 4bit will cost nearly 8GB GPU memory.
|
||||||
|
# pip install -U bitsandbytes
|
||||||
|
# 8-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_8bit=True)
|
||||||
|
# 4-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_4bit=True)
|
||||||
|
model = model.eval()
|
||||||
|
|
||||||
|
system_prompt = """You are an AI assistant whose name is InternLM (书生·浦语).
|
||||||
|
- InternLM (书生·浦语) is a conversational language model that is developed by Shanghai AI Laboratory (上海人工智能实验室). It is designed to be helpful, honest, and harmless.
|
||||||
|
- InternLM (书生·浦语) can understand and communicate fluently in the language chosen by the user such as English and 中文."""
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": system_prompt},
|
||||||
|
{"role": "user", "content": "Please tell me five scenic spots in Shanghai"},
|
||||||
|
]
|
||||||
|
tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
|
||||||
|
|
||||||
|
generated_ids = model.generate(tokenized_chat, max_new_tokens=1024, temperature=1, repetition_penalty=1.005, top_k=40, top_p=0.8)
|
||||||
|
|
||||||
|
generated_ids = [
|
||||||
|
output_ids[len(input_ids):] for input_ids, output_ids in zip(tokenized_chat, generated_ids)
|
||||||
|
]
|
||||||
|
prompt = tokenizer.batch_decode(tokenized_chat)[0]
|
||||||
|
print(prompt)
|
||||||
|
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
##### LMDeploy 推理
|
||||||
|
|
||||||
|
LMDeploy 是涵盖了 LLM 任务的全套轻量化、部署和服务解决方案。
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install lmdeploy
|
||||||
|
```
|
||||||
|
|
||||||
|
你可以使用以下 python 代码进行本地批量推理:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import lmdeploy
|
||||||
|
model_dir = "internlm/internlm3-8b-instruct"
|
||||||
|
pipe = lmdeploy.pipeline(model_dir)
|
||||||
|
response = pipe(["Please tell me five scenic spots in Shanghai"])
|
||||||
|
print(response)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
或者你可以使用以下命令启动兼容 OpenAI API 的服务:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
lmdeploy serve api_server internlm/internlm3-8b-instruct --model-name internlm3-8b-instruct --server-port 23333
|
||||||
|
```
|
||||||
|
|
||||||
|
然后你可以向服务端发起一个聊天请求:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:23333/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "internlm3-8b-instruct",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "介绍一下深度学习。"}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
更多信息请查看 [LMDeploy 文档](https://lmdeploy.readthedocs.io/en/latest/)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
##### Ollama 推理
|
||||||
|
|
||||||
|
准备工作
|
||||||
|
|
||||||
|
```python
|
||||||
|
# install ollama
|
||||||
|
curl -fsSL https://ollama.com/install.sh | sh
|
||||||
|
# fetch 模型
|
||||||
|
ollama pull internlm/internlm3-8b-instruct
|
||||||
|
# install python库
|
||||||
|
pip install ollama
|
||||||
|
```
|
||||||
|
|
||||||
|
推理代码
|
||||||
|
|
||||||
|
```python
|
||||||
|
import ollama
|
||||||
|
|
||||||
|
system_prompt = """You are an AI assistant whose name is InternLM (书生·浦语).
|
||||||
|
- InternLM (书生·浦语) is a conversational language model that is developed by Shanghai AI Laboratory (上海人工智能实验室). It is designed to be helpful, honest, and harmless.
|
||||||
|
- InternLM (书生·浦语) can understand and communicate fluently in the language chosen by the user such as English and 中文."""
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": system_prompt,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Please tell me five scenic spots in Shanghai"
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
stream = ollama.chat(
|
||||||
|
model='internlm/internlm3-8b-instruct',
|
||||||
|
messages=messages,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
for chunk in stream:
|
||||||
|
print(chunk['message']['content'], end='', flush=True)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
####
|
||||||
|
|
||||||
|
##### vLLM 推理
|
||||||
|
|
||||||
|
参考[文档](https://docs.vllm.ai/en/latest/getting_started/installation/index.html) 安装 vllm 最新代码
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install vllm --pre --extra-index-url https://wheels.vllm.ai/nightly
|
||||||
|
```
|
||||||
|
|
||||||
|
推理代码
|
||||||
|
|
||||||
|
```python
|
||||||
|
from vllm import LLM, SamplingParams
|
||||||
|
|
||||||
|
llm = LLM(model="internlm/internlm3-8b-instruct")
|
||||||
|
sampling_params = SamplingParams(temperature=1, repetition_penalty=1.005, top_k=40, top_p=0.8)
|
||||||
|
|
||||||
|
system_prompt = """You are an AI assistant whose name is InternLM (书生·浦语).
|
||||||
|
- InternLM (书生·浦语) is a conversational language model that is developed by Shanghai AI Laboratory (上海人工智能实验室). It is designed to be helpful, honest, and harmless.
|
||||||
|
- InternLM (书生·浦语) can understand and communicate fluently in the language chosen by the user such as English and 中文."""
|
||||||
|
|
||||||
|
prompts = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": system_prompt,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Please tell me five scenic spots in Shanghai"
|
||||||
|
},
|
||||||
|
]
|
||||||
|
outputs = llm.chat(prompts,
|
||||||
|
sampling_params=sampling_params,
|
||||||
|
use_tqdm=False)
|
||||||
|
print(outputs)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 深度思考模式
|
||||||
|
|
||||||
|
##### 深度思考 Demo
|
||||||
|
|
||||||
|
<img src="https://github.com/InternLM/InternLM/blob/017ba7446d20ecc3b9ab8e7b66cc034500868ab4/assets/solve_puzzle.png?raw=true" width="400"/>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
##### 深度思考 system prompt
|
||||||
|
|
||||||
|
```python
|
||||||
|
thinking_system_prompt = """You are an expert mathematician with extensive experience in mathematical competitions. You approach problems through systematic thinking and rigorous reasoning. When solving problems, follow these thought processes:
|
||||||
|
## Deep Understanding
|
||||||
|
Take time to fully comprehend the problem before attempting a solution. Consider:
|
||||||
|
- What is the real question being asked?
|
||||||
|
- What are the given conditions and what do they tell us?
|
||||||
|
- Are there any special restrictions or assumptions?
|
||||||
|
- Which information is crucial and which is supplementary?
|
||||||
|
## Multi-angle Analysis
|
||||||
|
Before solving, conduct thorough analysis:
|
||||||
|
- What mathematical concepts and properties are involved?
|
||||||
|
- Can you recall similar classic problems or solution methods?
|
||||||
|
- Would diagrams or tables help visualize the problem?
|
||||||
|
- Are there special cases that need separate consideration?
|
||||||
|
## Systematic Thinking
|
||||||
|
Plan your solution path:
|
||||||
|
- Propose multiple possible approaches
|
||||||
|
- Analyze the feasibility and merits of each method
|
||||||
|
- Choose the most appropriate method and explain why
|
||||||
|
- Break complex problems into smaller, manageable steps
|
||||||
|
## Rigorous Proof
|
||||||
|
During the solution process:
|
||||||
|
- Provide solid justification for each step
|
||||||
|
- Include detailed proofs for key conclusions
|
||||||
|
- Pay attention to logical connections
|
||||||
|
- Be vigilant about potential oversights
|
||||||
|
## Repeated Verification
|
||||||
|
After completing your solution:
|
||||||
|
- Verify your results satisfy all conditions
|
||||||
|
- Check for overlooked special cases
|
||||||
|
- Consider if the solution can be optimized or simplified
|
||||||
|
- Review your reasoning process
|
||||||
|
Remember:
|
||||||
|
1. Take time to think thoroughly rather than rushing to an answer
|
||||||
|
2. Rigorously prove each key conclusion
|
||||||
|
3. Keep an open mind and try different approaches
|
||||||
|
4. Summarize valuable problem-solving methods
|
||||||
|
5. Maintain healthy skepticism and verify multiple times
|
||||||
|
Your response should reflect deep mathematical understanding and precise logical thinking, making your solution path and reasoning clear to others.
|
||||||
|
When you're ready, present your complete solution with:
|
||||||
|
- Clear problem understanding
|
||||||
|
- Detailed solution process
|
||||||
|
- Key insights
|
||||||
|
- Thorough verification
|
||||||
|
Focus on clear, logical progression of ideas and thorough explanation of your mathematical reasoning. Provide answers in the same language as the user asking the question, repeat the final answer using a '\\boxed{}' without any units, you have [[8192]] tokens to complete the answer.
|
||||||
|
"""
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Transformers 推理
|
||||||
|
|
||||||
|
|
||||||
|
```python
|
||||||
|
import torch
|
||||||
|
from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||||
|
|
||||||
|
model_dir = "internlm/internlm3-8b-instruct"
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
|
||||||
|
# Set `torch_dtype=torch.float16` to load model in float16, otherwise it will be loaded as float32 and might cause OOM Error.
|
||||||
|
model = AutoModelForCausalLM.from_pretrained(model_dir, trust_remote_code=True, torch_dtype=torch.bfloat16).cuda()
|
||||||
|
# (Optional) If on low resource devices, you can load model in 4-bit or 8-bit to further save GPU memory via bitsandbytes.
|
||||||
|
# InternLM3 8B in 4bit will cost nearly 8GB GPU memory.
|
||||||
|
# pip install -U bitsandbytes
|
||||||
|
# 8-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_8bit=True)
|
||||||
|
# 4-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_4bit=True)
|
||||||
|
model = model.eval()
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": thinking_system_prompt},
|
||||||
|
{"role": "user", "content": "已知函数\(f(x)=\mathrm{e}^{x}-ax - a^{3}\)。\n(1)当\(a = 1\)时,求曲线\(y = f(x)\)在点\((1,f(1))\)处的切线方程;\n(2)若\(f(x)\)有极小值,且极小值小于\(0\),求\(a\)的取值范围。"},
|
||||||
|
]
|
||||||
|
tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
|
||||||
|
|
||||||
|
generated_ids = model.generate(tokenized_chat, max_new_tokens=8192)
|
||||||
|
|
||||||
|
generated_ids = [
|
||||||
|
output_ids[len(input_ids):] for input_ids, output_ids in zip(tokenized_chat, generated_ids)
|
||||||
|
]
|
||||||
|
prompt = tokenizer.batch_decode(tokenized_chat)[0]
|
||||||
|
print(prompt)
|
||||||
|
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
##### LMDeploy 推理
|
||||||
|
|
||||||
|
LMDeploy is a toolkit for compressing, deploying, and serving LLM, developed by the MMRazor and MMDeploy teams.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install lmdeploy
|
||||||
|
```
|
||||||
|
|
||||||
|
You can run batch inference locally with the following python code:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from lmdeploy import pipeline, GenerationConfig, ChatTemplateConfig
|
||||||
|
model_dir = "internlm/internlm3-8b-instruct"
|
||||||
|
chat_template_config = ChatTemplateConfig(model_name='internlm3')
|
||||||
|
pipe = pipeline(model_dir, chat_template_config=chat_template_config)
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": thinking_system_prompt},
|
||||||
|
{"role": "user", "content": "已知函数\(f(x)=\mathrm{e}^{x}-ax - a^{3}\)。\n(1)当\(a = 1\)时,求曲线\(y = f(x)\)在点\((1,f(1))\)处的切线方程;\n(2)若\(f(x)\)有极小值,且极小值小于\(0\),求\(a\)的取值范围。"},
|
||||||
|
]
|
||||||
|
|
||||||
|
response = pipe(messages, gen_config=GenerationConfig(max_new_tokens=2048))
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Ollama 推理
|
||||||
|
|
||||||
|
准备工作
|
||||||
|
|
||||||
|
```python
|
||||||
|
# install ollama
|
||||||
|
curl -fsSL https://ollama.com/install.sh | sh
|
||||||
|
# fetch 模型
|
||||||
|
ollama pull internlm/internlm3-8b-instruct
|
||||||
|
# install python库
|
||||||
|
pip install ollama
|
||||||
|
```
|
||||||
|
|
||||||
|
inference code,
|
||||||
|
|
||||||
|
```python
|
||||||
|
import ollama
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": thinking_system_prompt,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Given the function\(f(x)=\mathrm{e}^{x}-ax - a^{3}\),\n(1) When \(a = 1\), find the equation of the tangent line to the curve \(y = f(x)\) at the point \((1,f(1))\).\n(2) If \(f(x)\) has a local minimum and the minimum value is less than \(0\), determine the range of values for \(a\)."
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
stream = ollama.chat(
|
||||||
|
model='internlm/internlm3-8b-instruct',
|
||||||
|
messages=messages,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
for chunk in stream:
|
||||||
|
print(chunk['message']['content'], end='', flush=True)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
####
|
||||||
|
|
||||||
|
##### vLLM 推理
|
||||||
|
|
||||||
|
参考[文档](https://docs.vllm.ai/en/latest/getting_started/installation/index.html) 安装 vllm 最新代码
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install vllm --pre --extra-index-url https://wheels.vllm.ai/nightly
|
||||||
|
```
|
||||||
|
|
||||||
|
推理代码
|
||||||
|
|
||||||
|
```python
|
||||||
|
from vllm import LLM, SamplingParams
|
||||||
|
|
||||||
|
llm = LLM(model="internlm/internlm3-8b-instruct")
|
||||||
|
sampling_params = SamplingParams(temperature=1, repetition_penalty=1.005, top_k=40, top_p=0.8, max_tokens=8192)
|
||||||
|
|
||||||
|
prompts = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": thinking_system_prompt,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "已知函数\(f(x)=\mathrm{e}^{x}-ax - a^{3}\)。\n(1)当\(a = 1\)时,求曲线\(y = f(x)\)在点\((1,f(1))\)处的切线方程;\n(2)若\(f(x)\)有极小值,且极小值小于\(0\),求\(a\)的取值范围。"
|
||||||
|
},
|
||||||
|
]
|
||||||
|
outputs = llm.chat(prompts,
|
||||||
|
sampling_params=sampling_params,
|
||||||
|
use_tqdm=False)
|
||||||
|
print(outputs)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## 开源许可证
|
||||||
|
|
||||||
|
本仓库的代码和权重依照 Apache-2.0 协议开源。
|
||||||
|
|
||||||
|
## 引用
|
||||||
|
|
||||||
|
```
|
||||||
|
@misc{cai2024internlm2,
|
||||||
|
title={InternLM2 Technical Report},
|
||||||
|
author={Zheng Cai and Maosong Cao and Haojiong Chen and Kai Chen and Keyu Chen and Xin Chen and Xun Chen and Zehui Chen and Zhi Chen and Pei Chu and Xiaoyi Dong and Haodong Duan and Qi Fan and Zhaoye Fei and Yang Gao and Jiaye Ge and Chenya Gu and Yuzhe Gu and Tao Gui and Aijia Guo and Qipeng Guo and Conghui He and Yingfan Hu and Ting Huang and Tao Jiang and Penglong Jiao and Zhenjiang Jin and Zhikai Lei and Jiaxing Li and Jingwen Li and Linyang Li and Shuaibin Li and Wei Li and Yining Li and Hongwei Liu and Jiangning Liu and Jiawei Hong and Kaiwen Liu and Kuikun Liu and Xiaoran Liu and Chengqi Lv and Haijun Lv and Kai Lv and Li Ma and Runyuan Ma and Zerun Ma and Wenchang Ning and Linke Ouyang and Jiantao Qiu and Yuan Qu and Fukai Shang and Yunfan Shao and Demin Song and Zifan Song and Zhihao Sui and Peng Sun and Yu Sun and Huanze Tang and Bin Wang and Guoteng Wang and Jiaqi Wang and Jiayu Wang and Rui Wang and Yudong Wang and Ziyi Wang and Xingjian Wei and Qizhen Weng and Fan Wu and Yingtong Xiong and Chao Xu and Ruiliang Xu and Hang Yan and Yirong Yan and Xiaogui Yang and Haochen Ye and Huaiyuan Ying and Jia Yu and Jing Yu and Yuhang Zang and Chuyu Zhang and Li Zhang and Pan Zhang and Peng Zhang and Ruijie Zhang and Shuo Zhang and Songyang Zhang and Wenjian Zhang and Wenwei Zhang and Xingcheng Zhang and Xinyue Zhang and Hui Zhao and Qian Zhao and Xiaomeng Zhao and Fengzhe Zhou and Zaida Zhou and Jingming Zhuo and Yicheng Zou and Xipeng Qiu and Yu Qiao and Dahua Lin},
|
||||||
|
year={2024},
|
||||||
|
eprint={2403.17297},
|
||||||
|
archivePrefix={arXiv},
|
||||||
|
primaryClass={cs.CL}
|
||||||
|
}
|
||||||
|
```
|
||||||
37
config.json
Normal file
37
config.json
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
{
|
||||||
|
"architectures": [
|
||||||
|
"InternLM3ForCausalLM"
|
||||||
|
],
|
||||||
|
"attention_dropout": 0.0,
|
||||||
|
"auto_map": {
|
||||||
|
"AutoConfig": "configuration_internlm3.InternLM3Config",
|
||||||
|
"AutoModel": "modeling_internlm3.InternLM3Model",
|
||||||
|
"AutoModelForCausalLM": "modeling_internlm3.InternLM3ForCausalLM"
|
||||||
|
},
|
||||||
|
"bias": false,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"head_dim": 128,
|
||||||
|
"hidden_act": "silu",
|
||||||
|
"hidden_size": 4096,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"intermediate_size": 10240,
|
||||||
|
"max_position_embeddings": 32768,
|
||||||
|
"model_type": "internlm3",
|
||||||
|
"num_attention_heads": 32,
|
||||||
|
"num_hidden_layers": 48,
|
||||||
|
"num_key_value_heads": 2,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"qkv_bias": false,
|
||||||
|
"rms_norm_eps": 1e-05,
|
||||||
|
"rope_scaling": {
|
||||||
|
"factor": 6.0,
|
||||||
|
"rope_type": "dynamic"
|
||||||
|
},
|
||||||
|
"rope_theta": 50000000,
|
||||||
|
"tie_word_embeddings": false,
|
||||||
|
"torch_dtype": "bfloat16",
|
||||||
|
"transformers_version": "4.47.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 128512
|
||||||
|
}
|
||||||
1
configuration.json
Normal file
1
configuration.json
Normal file
@@ -0,0 +1 @@
|
|||||||
|
{"framework": "pytorch", "task": "text-generation", "allow_remote": true}
|
||||||
197
configuration_internlm3.py
Normal file
197
configuration_internlm3.py
Normal file
@@ -0,0 +1,197 @@
|
|||||||
|
# coding=utf-8
|
||||||
|
# Copyright (c) The InternLM team and The HuggingFace Inc. team. All rights reserved.
|
||||||
|
#
|
||||||
|
# This code is based on transformers/src/transformers/models/llama/configuration_llama.py
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
""" InternLM3 model configuration"""
|
||||||
|
|
||||||
|
from transformers.configuration_utils import PretrainedConfig
|
||||||
|
from transformers.modeling_rope_utils import rope_config_validation
|
||||||
|
from transformers.utils import logging
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class InternLM3Config(PretrainedConfig):
|
||||||
|
r"""
|
||||||
|
This is the configuration class to store the configuration of a [`InternLM2Model`]. It is used to instantiate
|
||||||
|
an InternLM2 model according to the specified arguments, defining the model architecture. Instantiating a
|
||||||
|
configuration with the defaults will yield a similar configuration to that of the InternLM2-7B.
|
||||||
|
|
||||||
|
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
|
||||||
|
documentation from [`PretrainedConfig`] for more information.
|
||||||
|
|
||||||
|
|
||||||
|
Args:
|
||||||
|
vocab_size (`int`, *optional*, defaults to 151936):
|
||||||
|
Vocabulary size of the InternLM3 model. Defines the number of different tokens that can be represented by the
|
||||||
|
`inputs_ids` passed when calling [`InternLM3Model`]
|
||||||
|
hidden_size (`int`, *optional*, defaults to 4096):
|
||||||
|
Dimension of the hidden representations.
|
||||||
|
intermediate_size (`int`, *optional*, defaults to 22016):
|
||||||
|
Dimension of the MLP representations.
|
||||||
|
num_hidden_layers (`int`, *optional*, defaults to 32):
|
||||||
|
Number of hidden layers in the Transformer encoder.
|
||||||
|
num_attention_heads (`int`, *optional*, defaults to 32):
|
||||||
|
Number of attention heads for each attention layer in the Transformer encoder.
|
||||||
|
num_key_value_heads (`int`, *optional*, defaults to 32):
|
||||||
|
This is the number of key_value heads that should be used to implement Grouped Query Attention. If
|
||||||
|
`num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
|
||||||
|
`num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
|
||||||
|
converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
|
||||||
|
by meanpooling all the original heads within that group. For more details checkout [this
|
||||||
|
paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to `32`.
|
||||||
|
hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
|
||||||
|
The non-linear activation function (function or string) in the decoder.
|
||||||
|
max_position_embeddings (`int`, *optional*, defaults to 32768):
|
||||||
|
The maximum sequence length that this model might ever be used with.
|
||||||
|
initializer_range (`float`, *optional*, defaults to 0.02):
|
||||||
|
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||||
|
rms_norm_eps (`float`, *optional*, defaults to 1e-06):
|
||||||
|
The epsilon used by the rms normalization layers.
|
||||||
|
use_cache (`bool`, *optional*, defaults to `True`):
|
||||||
|
Whether or not the model should return the last key/values attentions (not used by all models). Only
|
||||||
|
relevant if `config.is_decoder=True`.
|
||||||
|
tie_word_embeddings (`bool`, *optional*, defaults to `False`):
|
||||||
|
Whether the model's input and output word embeddings should be tied.
|
||||||
|
rope_theta (`float`, *optional*, defaults to 10000.0):
|
||||||
|
The base period of the RoPE embeddings.
|
||||||
|
rope_scaling (`Dict`, *optional*):
|
||||||
|
Dictionary containing the scaling configuration for the RoPE embeddings. NOTE: if you apply new rope type
|
||||||
|
and you expect the model to work on longer `max_position_embeddings`, we recommend you to update this value
|
||||||
|
accordingly.
|
||||||
|
Expected contents:
|
||||||
|
`rope_type` (`str`):
|
||||||
|
The sub-variant of RoPE to use. Can be one of ['default', 'linear', 'dynamic', 'yarn', 'longrope',
|
||||||
|
'llama3'], with 'default' being the original RoPE implementation.
|
||||||
|
`factor` (`float`, *optional*):
|
||||||
|
Used with all rope types except 'default'. The scaling factor to apply to the RoPE embeddings. In
|
||||||
|
most scaling types, a `factor` of x will enable the model to handle sequences of length x *
|
||||||
|
original maximum pre-trained length.
|
||||||
|
`original_max_position_embeddings` (`int`, *optional*):
|
||||||
|
Used with 'dynamic', 'longrope' and 'llama3'. The original max position embeddings used during
|
||||||
|
pretraining.
|
||||||
|
`attention_factor` (`float`, *optional*):
|
||||||
|
Used with 'yarn' and 'longrope'. The scaling factor to be applied on the attention
|
||||||
|
computation. If unspecified, it defaults to value recommended by the implementation, using the
|
||||||
|
`factor` field to infer the suggested value.
|
||||||
|
`beta_fast` (`float`, *optional*):
|
||||||
|
Only used with 'yarn'. Parameter to set the boundary for extrapolation (only) in the linear
|
||||||
|
ramp function. If unspecified, it defaults to 32.
|
||||||
|
`beta_slow` (`float`, *optional*):
|
||||||
|
Only used with 'yarn'. Parameter to set the boundary for interpolation (only) in the linear
|
||||||
|
ramp function. If unspecified, it defaults to 1.
|
||||||
|
`short_factor` (`List[float]`, *optional*):
|
||||||
|
Only used with 'longrope'. The scaling factor to be applied to short contexts (<
|
||||||
|
`original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden
|
||||||
|
size divided by the number of attention heads divided by 2
|
||||||
|
`long_factor` (`List[float]`, *optional*):
|
||||||
|
Only used with 'longrope'. The scaling factor to be applied to long contexts (<
|
||||||
|
`original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden
|
||||||
|
size divided by the number of attention heads divided by 2
|
||||||
|
`low_freq_factor` (`float`, *optional*):
|
||||||
|
Only used with 'llama3'. Scaling factor applied to low frequency components of the RoPE
|
||||||
|
`high_freq_factor` (`float`, *optional*):
|
||||||
|
Only used with 'llama3'. Scaling factor applied to high frequency components of the RoPE
|
||||||
|
qkv_bias (`bool`, *optional*, defaults to `False`):
|
||||||
|
Whether to use a bias in the query, key and value projection layers during self-attention.
|
||||||
|
attention_dropout (`float`, *optional*, defaults to 0.0):
|
||||||
|
The dropout ratio for the attention probabilities.
|
||||||
|
bias (`bool`, *optional*, defaults to `False`):
|
||||||
|
Whether to use a bias in o_proj, up_proj, down_proj and gate_proj layers.
|
||||||
|
head_dim (`int`, *optional*):
|
||||||
|
The attention head dimension. If None, it will default to hidden_size // num_heads
|
||||||
|
|
||||||
|
```python
|
||||||
|
>>> from transformers import InternLM3Model, InternLM3Config
|
||||||
|
|
||||||
|
>>> # Initializing a InternLM3 style configuration
|
||||||
|
>>> configuration = InternLM3Config()
|
||||||
|
|
||||||
|
>>> # Initializing a model from the InternLM3-8B style configuration
|
||||||
|
>>> model = InternLM3Model(configuration)
|
||||||
|
|
||||||
|
>>> # Accessing the model configuration
|
||||||
|
>>> configuration = model.config
|
||||||
|
```"""
|
||||||
|
|
||||||
|
model_type = "internlm3"
|
||||||
|
keys_to_ignore_at_inference = ["past_key_values"]
|
||||||
|
|
||||||
|
# Default tensor parallel plan for base model `InternLM3`
|
||||||
|
base_model_tp_plan = {
|
||||||
|
"layers.*.self_attn.q_proj": "colwise",
|
||||||
|
"layers.*.self_attn.k_proj": "colwise",
|
||||||
|
"layers.*.self_attn.v_proj": "colwise",
|
||||||
|
"layers.*.self_attn.o_proj": "rowwise",
|
||||||
|
"layers.*.mlp.gate_proj": "colwise",
|
||||||
|
"layers.*.mlp.up_proj": "colwise",
|
||||||
|
"layers.*.mlp.down_proj": "rowwise",
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
vocab_size=128512,
|
||||||
|
hidden_size=4096,
|
||||||
|
intermediate_size=11008,
|
||||||
|
num_hidden_layers=32,
|
||||||
|
num_attention_heads=32,
|
||||||
|
num_key_value_heads=32,
|
||||||
|
hidden_act="silu",
|
||||||
|
max_position_embeddings=32768,
|
||||||
|
initializer_range=0.02,
|
||||||
|
rms_norm_eps=1e-6,
|
||||||
|
use_cache=True,
|
||||||
|
tie_word_embeddings=False,
|
||||||
|
rope_theta=10000.0,
|
||||||
|
rope_scaling=None,
|
||||||
|
qkv_bias=False,
|
||||||
|
attention_dropout=0.0,
|
||||||
|
bias=False,
|
||||||
|
head_dim=None,
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
|
self.vocab_size = vocab_size
|
||||||
|
self.max_position_embeddings = max_position_embeddings
|
||||||
|
self.hidden_size = hidden_size
|
||||||
|
self.intermediate_size = intermediate_size
|
||||||
|
self.num_hidden_layers = num_hidden_layers
|
||||||
|
self.num_attention_heads = num_attention_heads
|
||||||
|
|
||||||
|
# for backward compatibility
|
||||||
|
if num_key_value_heads is None:
|
||||||
|
num_key_value_heads = num_attention_heads
|
||||||
|
|
||||||
|
self.num_key_value_heads = num_key_value_heads
|
||||||
|
self.hidden_act = hidden_act
|
||||||
|
self.initializer_range = initializer_range
|
||||||
|
self.rms_norm_eps = rms_norm_eps
|
||||||
|
self.use_cache = use_cache
|
||||||
|
self.rope_theta = rope_theta
|
||||||
|
self.rope_scaling = rope_scaling
|
||||||
|
self.qkv_bias = qkv_bias
|
||||||
|
self.attention_dropout = attention_dropout
|
||||||
|
self.bias = bias
|
||||||
|
self.head_dim = head_dim if head_dim is not None else self.hidden_size // self.num_attention_heads
|
||||||
|
# Validate the correctness of rotary position embeddings parameters
|
||||||
|
# BC: if there is a 'type' field, move it to 'rope_type'.
|
||||||
|
if self.rope_scaling is not None and "type" in self.rope_scaling:
|
||||||
|
self.rope_scaling["rope_type"] = self.rope_scaling["type"]
|
||||||
|
rope_config_validation(self)
|
||||||
|
|
||||||
|
super().__init__(
|
||||||
|
tie_word_embeddings=tie_word_embeddings,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
9
generation_config.json
Normal file
9
generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": [
|
||||||
|
2,
|
||||||
|
128131
|
||||||
|
],
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"transformers_version": "4.47.1"
|
||||||
|
}
|
||||||
3
model-00001-of-00002.safetensors
Normal file
3
model-00001-of-00002.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:9a18eb700cd28f03c0745ac75ab53ead514cfa703fabb82c61a37b85cb593ff4
|
||||||
|
size 9928388896
|
||||||
3
model-00002-of-00002.safetensors
Normal file
3
model-00002-of-00002.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:e324110df616ca5190f20469528f13421476f38f0e2597dac4d9ee0bdcede797
|
||||||
|
size 7680144544
|
||||||
442
model.safetensors.index.json
Normal file
442
model.safetensors.index.json
Normal file
@@ -0,0 +1,442 @@
|
|||||||
|
{
|
||||||
|
"metadata": {
|
||||||
|
"total_size": 17608482816
|
||||||
|
},
|
||||||
|
"weight_map": {
|
||||||
|
"lm_head.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.embed_tokens.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.21.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.21.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.21.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.21.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.21.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.21.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.22.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.22.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.22.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.22.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.22.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.22.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.23.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.23.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.23.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.23.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.23.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.23.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.24.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.24.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.24.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.24.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.24.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.24.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.25.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.25.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.25.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.25.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.25.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.25.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.26.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.26.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.26.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.26.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.26.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.26.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.27.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.27.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.32.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.32.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.32.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.32.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.32.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.32.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.32.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.32.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.32.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.33.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.33.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.33.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.33.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.33.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.33.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.33.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.33.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.33.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.34.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.34.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.34.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.34.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.34.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.34.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.34.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.34.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.34.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.35.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.35.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.35.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.35.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.35.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.35.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.35.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.35.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.35.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.36.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.36.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.36.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.36.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.36.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.36.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.36.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.36.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.36.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.37.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.37.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.37.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.37.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.37.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.37.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.37.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.37.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.37.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.38.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.38.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.38.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.38.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.38.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.38.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.38.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.38.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.38.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.39.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.39.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.39.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.39.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.39.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.39.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.39.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.39.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.39.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.40.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.40.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.40.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.40.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.40.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.40.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.40.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.40.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.40.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.41.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.41.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.41.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.41.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.41.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.41.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.41.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.41.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.41.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.42.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.42.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.42.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.42.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.42.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.42.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.42.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.42.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.42.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.43.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.43.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.43.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.43.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.43.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.43.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.43.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.43.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.43.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.44.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.44.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.44.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.44.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.44.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.44.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.44.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.44.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.44.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.45.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.45.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.45.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.45.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.45.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.45.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.45.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.45.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.45.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.46.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.46.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.46.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.46.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.46.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.46.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.46.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.46.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.46.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.47.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.47.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.47.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.47.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.47.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.47.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.47.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.47.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.47.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||||
|
"model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||||
|
"model.norm.weight": "model-00002-of-00002.safetensors"
|
||||||
|
}
|
||||||
|
}
|
||||||
1191
modeling_internlm3.py
Normal file
1191
modeling_internlm3.py
Normal file
File diff suppressed because it is too large
Load Diff
54
special_tokens_map.json
Normal file
54
special_tokens_map.json
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
{
|
||||||
|
"additional_special_tokens": [
|
||||||
|
"<|im_start|>",
|
||||||
|
"<|im_end|>",
|
||||||
|
"<|action_start|>",
|
||||||
|
"<|action_end|>",
|
||||||
|
"<|interpreter|>",
|
||||||
|
"<|plugin|>",
|
||||||
|
"<restate>",
|
||||||
|
"</restate>",
|
||||||
|
"<planning>",
|
||||||
|
"</planning>",
|
||||||
|
"<recollect>",
|
||||||
|
"</recollect>",
|
||||||
|
"<execution>",
|
||||||
|
"</execution>",
|
||||||
|
"<review>",
|
||||||
|
"</review>",
|
||||||
|
"<summarize>",
|
||||||
|
"</summarize>",
|
||||||
|
"<retry>",
|
||||||
|
"</retry>",
|
||||||
|
"<conclude>",
|
||||||
|
"</conclude>"
|
||||||
|
],
|
||||||
|
"bos_token": {
|
||||||
|
"content": "<s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"eos_token": {
|
||||||
|
"content": "</s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"pad_token": {
|
||||||
|
"content": "</s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"unk_token": {
|
||||||
|
"content": "<unk>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
}
|
||||||
|
}
|
||||||
294
tokenization_internlm3.py
Normal file
294
tokenization_internlm3.py
Normal file
@@ -0,0 +1,294 @@
|
|||||||
|
import os
|
||||||
|
from shutil import copyfile
|
||||||
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
import sentencepiece as spm
|
||||||
|
from transformers.tokenization_utils import AddedToken, PreTrainedTokenizer
|
||||||
|
from transformers.utils import logging
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from transformers.tokenization_utils_base import TextInput
|
||||||
|
|
||||||
|
logger = logging.get_logger(__name__)
|
||||||
|
|
||||||
|
VOCAB_FILES_NAMES = {"vocab_file": "tokenizer.model"}
|
||||||
|
|
||||||
|
SPIECE_UNDERLINE = "▁"
|
||||||
|
|
||||||
|
|
||||||
|
class InternLM3Tokenizer(PreTrainedTokenizer):
|
||||||
|
"""
|
||||||
|
Construct a InternLM3 tokenizer. Based on byte-level Byte-Pair-Encoding. The default padding token is unset as there is
|
||||||
|
no padding token in the original model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
vocab_file (`str`):
|
||||||
|
Path to the vocabulary file.
|
||||||
|
unk_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<unk>"`):
|
||||||
|
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
|
||||||
|
token instead.
|
||||||
|
bos_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<s>"`):
|
||||||
|
The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.
|
||||||
|
eos_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"</s>"`):
|
||||||
|
The end of sequence token.
|
||||||
|
pad_token (`str` or `tokenizers.AddedToken`, *optional*):
|
||||||
|
A special token used to make arrays of tokens the same size for batching purpose. Will then be ignored by
|
||||||
|
attention mechanisms or loss computation.
|
||||||
|
sp_model_kwargs (`Dict[str, Any]`, `Optional`, *optional*):
|
||||||
|
Will be passed to the `SentencePieceProcessor.__init__()` method. The [Python wrapper for
|
||||||
|
SentencePiece](https://github.com/google/sentencepiece/tree/master/python) can be used, among other things,
|
||||||
|
to set:
|
||||||
|
|
||||||
|
- `enable_sampling`: Enable subword regularization.
|
||||||
|
- `nbest_size`: Sampling parameters for unigram. Invalid for BPE-Dropout.
|
||||||
|
|
||||||
|
- `nbest_size = {0,1}`: No sampling is performed.
|
||||||
|
- `nbest_size > 1`: samples from the nbest_size results.
|
||||||
|
- `nbest_size < 0`: assuming that nbest_size is infinite and samples from the all hypothesis (lattice)
|
||||||
|
using forward-filtering-and-backward-sampling algorithm.
|
||||||
|
|
||||||
|
- `alpha`: Smoothing parameter for unigram sampling, and dropout probability of merge operations for
|
||||||
|
BPE-dropout.
|
||||||
|
|
||||||
|
add_bos_token (`bool`, *optional*, defaults to `True`):
|
||||||
|
Whether or not to add an `bos_token` at the start of sequences.
|
||||||
|
add_eos_token (`bool`, *optional*, defaults to `False`):
|
||||||
|
Whether or not to add an `eos_token` at the end of sequences.
|
||||||
|
clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
|
||||||
|
Whether or not to cleanup spaces after decoding, cleanup consists in removing potential artifacts like
|
||||||
|
extra spaces.
|
||||||
|
use_default_system_prompt (`bool`, *optional*, defaults to `False`):
|
||||||
|
Whether or not the default system prompt for InternLM3 should be used.
|
||||||
|
spaces_between_special_tokens (`bool`, *optional*, defaults to `False`):
|
||||||
|
Whether or not to add spaces between special tokens.
|
||||||
|
spaces_for_interleaved_special_tokens (`bool`, *optional*, defaults to `False`):
|
||||||
|
Whether or not to add spaces between special tokens that are interleaved with normal tokens.
|
||||||
|
add_prefix_space (`bool`, *optional*, defaults to `True`):
|
||||||
|
Whether or not to add an initial space to the input. This allows to treat the leading word just as any
|
||||||
|
other word. Again, this should be set with `from_slow=True` to make sure it's taken into account.
|
||||||
|
"""
|
||||||
|
_auto_class = "AutoTokenizer"
|
||||||
|
vocab_files_names = VOCAB_FILES_NAMES
|
||||||
|
model_input_names = ["input_ids", "attention_mask"]
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
vocab_file,
|
||||||
|
unk_token="<unk>",
|
||||||
|
bos_token="<s>",
|
||||||
|
eos_token="</s>",
|
||||||
|
pad_token=None,
|
||||||
|
sp_model_kwargs: Optional[Dict[str, Any]] = None,
|
||||||
|
add_bos_token=True,
|
||||||
|
add_eos_token=False,
|
||||||
|
clean_up_tokenization_spaces=False,
|
||||||
|
use_default_system_prompt=False,
|
||||||
|
spaces_between_special_tokens=False,
|
||||||
|
spaces_for_interleaved_special_tokens=False,
|
||||||
|
add_prefix_space=True,
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
|
self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
|
||||||
|
bos_token = AddedToken(bos_token, normalized=False, special=True) if isinstance(bos_token, str) else bos_token
|
||||||
|
eos_token = AddedToken(eos_token, normalized=False, special=True) if isinstance(eos_token, str) else eos_token
|
||||||
|
unk_token = AddedToken(unk_token, normalized=False, special=True) if isinstance(unk_token, str) else unk_token
|
||||||
|
pad_token = AddedToken(pad_token, normalized=False, special=True) if isinstance(pad_token, str) else pad_token
|
||||||
|
|
||||||
|
self.vocab_file = vocab_file
|
||||||
|
self.add_bos_token = add_bos_token
|
||||||
|
self.add_eos_token = add_eos_token
|
||||||
|
self.use_default_system_prompt = use_default_system_prompt
|
||||||
|
self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
|
||||||
|
self.sp_model.Load(vocab_file)
|
||||||
|
self.add_prefix_space = add_prefix_space
|
||||||
|
self.spaces_for_interleaved_special_tokens = spaces_for_interleaved_special_tokens
|
||||||
|
|
||||||
|
vocab_size = self.sp_model.get_piece_size()
|
||||||
|
self.decoder = {i: self.sp_model.id_to_piece(i) for i in range(vocab_size)}
|
||||||
|
|
||||||
|
super().__init__(
|
||||||
|
bos_token=bos_token,
|
||||||
|
eos_token=eos_token,
|
||||||
|
unk_token=unk_token,
|
||||||
|
pad_token=pad_token,
|
||||||
|
add_bos_token=add_bos_token,
|
||||||
|
add_eos_token=add_eos_token,
|
||||||
|
sp_model_kwargs=sp_model_kwargs,
|
||||||
|
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
|
||||||
|
use_default_system_prompt=use_default_system_prompt,
|
||||||
|
spaces_between_special_tokens=spaces_between_special_tokens,
|
||||||
|
add_prefix_space=add_prefix_space,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
def __getstate__(self):
|
||||||
|
state = self.__dict__.copy()
|
||||||
|
state["sp_model"] = None
|
||||||
|
state["sp_model_proto"] = self.sp_model.serialized_model_proto()
|
||||||
|
return state
|
||||||
|
|
||||||
|
def __setstate__(self, d):
|
||||||
|
self.__dict__.update(d)
|
||||||
|
self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
|
||||||
|
self.sp_model.LoadFromSerializedProto(self.sp_model_proto)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def vocab_size(self):
|
||||||
|
"""Returns vocab size"""
|
||||||
|
return self.sp_model.get_piece_size()
|
||||||
|
|
||||||
|
def get_vocab(self):
|
||||||
|
"""Returns vocab as a dict"""
|
||||||
|
vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
|
||||||
|
vocab.update(self.added_tokens_encoder)
|
||||||
|
return vocab
|
||||||
|
|
||||||
|
def tokenize(self, text: "TextInput", **kwargs) -> List[str]:
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
text: TextInput
|
||||||
|
Simply calls PreTrainedTokenizer's method
|
||||||
|
"""
|
||||||
|
return super().tokenize(text, **kwargs)
|
||||||
|
|
||||||
|
def _tokenize(self, text, **kwargs):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
text: TextInput
|
||||||
|
Returns a tokenized string. The Gemma tokenizer never adds a prefix space.
|
||||||
|
"""
|
||||||
|
return self.sp_model.encode(text, out_type=str)
|
||||||
|
|
||||||
|
def _convert_token_to_id(self, token):
|
||||||
|
"""Converts a token (str) in an id using the vocab."""
|
||||||
|
return self.sp_model.piece_to_id(token)
|
||||||
|
|
||||||
|
def _convert_id_to_token(self, index):
|
||||||
|
"""Converts an index (integer) in a token (str) using the vocab."""
|
||||||
|
return self.decoder.get(index, "")
|
||||||
|
|
||||||
|
def convert_tokens_to_string(self, tokens):
|
||||||
|
"""Converts a sequence of tokens (string) in a single string."""
|
||||||
|
# since we manually add the prefix space, we have to remove it when decoding
|
||||||
|
if tokens[0].startswith(SPIECE_UNDERLINE) and self.add_prefix_space:
|
||||||
|
tokens[0] = tokens[0][1:]
|
||||||
|
|
||||||
|
current_sub_tokens = []
|
||||||
|
out_string = ""
|
||||||
|
prev_is_special = False
|
||||||
|
for i, token in enumerate(tokens):
|
||||||
|
# make sure that special tokens are not decoded using sentencepiece model
|
||||||
|
if token in self.all_special_tokens:
|
||||||
|
if not prev_is_special and i != 0 and self.spaces_for_interleaved_special_tokens:
|
||||||
|
out_string += " "
|
||||||
|
out_string += self.sp_model.decode(current_sub_tokens) + token
|
||||||
|
prev_is_special = True
|
||||||
|
current_sub_tokens = []
|
||||||
|
else:
|
||||||
|
if (
|
||||||
|
prev_is_special
|
||||||
|
and i == 1
|
||||||
|
and self.add_prefix_space
|
||||||
|
and not token.startswith(SPIECE_UNDERLINE)
|
||||||
|
and self.spaces_for_interleaved_special_tokens
|
||||||
|
):
|
||||||
|
out_string += " "
|
||||||
|
current_sub_tokens.append(token)
|
||||||
|
prev_is_special = False
|
||||||
|
out_string += self.sp_model.decode(current_sub_tokens)
|
||||||
|
return out_string
|
||||||
|
|
||||||
|
def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]:
|
||||||
|
"""
|
||||||
|
Save the vocabulary and special tokens file to a directory.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
save_directory (`str`):
|
||||||
|
The directory in which to save the vocabulary.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
`Tuple(str)`: Paths to the files saved.
|
||||||
|
"""
|
||||||
|
if not os.path.isdir(save_directory):
|
||||||
|
logger.error(f"Vocabulary path ({save_directory}) should be a directory")
|
||||||
|
return
|
||||||
|
out_vocab_file = os.path.join(save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"])
|
||||||
|
|
||||||
|
if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
|
||||||
|
copyfile(self.vocab_file, out_vocab_file)
|
||||||
|
elif not os.path.isfile(self.vocab_file):
|
||||||
|
with open(out_vocab_file, "wb") as fi:
|
||||||
|
content_spiece_model = self.sp_model.serialized_model_proto()
|
||||||
|
fi.write(content_spiece_model)
|
||||||
|
|
||||||
|
return (out_vocab_file,)
|
||||||
|
|
||||||
|
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
||||||
|
bos_token_id = [self.bos_token_id] if self.add_bos_token else []
|
||||||
|
eos_token_id = [self.eos_token_id] if self.add_eos_token else []
|
||||||
|
|
||||||
|
output = bos_token_id + token_ids_0 + eos_token_id
|
||||||
|
|
||||||
|
if token_ids_1 is not None:
|
||||||
|
output = output + bos_token_id + token_ids_1 + eos_token_id
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
def get_special_tokens_mask(
|
||||||
|
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
|
||||||
|
) -> List[int]:
|
||||||
|
"""
|
||||||
|
Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
|
||||||
|
special tokens using the tokenizer `prepare_for_model` method.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
token_ids_0 (`List[int]`):
|
||||||
|
List of IDs.
|
||||||
|
token_ids_1 (`List[int]`, *optional*):
|
||||||
|
Optional second list of IDs for sequence pairs.
|
||||||
|
already_has_special_tokens (`bool`, *optional*, defaults to `False`):
|
||||||
|
Whether or not the token list is already formatted with special tokens for the model.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
|
||||||
|
"""
|
||||||
|
if already_has_special_tokens:
|
||||||
|
return super().get_special_tokens_mask(token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True)
|
||||||
|
|
||||||
|
bos_token_id = [1] if self.add_bos_token else []
|
||||||
|
eos_token_id = [1] if self.add_eos_token else []
|
||||||
|
|
||||||
|
if token_ids_1 is None:
|
||||||
|
return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id
|
||||||
|
return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id + bos_token_id + ([0] * len(token_ids_1)) + eos_token_id
|
||||||
|
|
||||||
|
def create_token_type_ids_from_sequences(self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None) -> List[int]:
|
||||||
|
"""
|
||||||
|
Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT
|
||||||
|
sequence pair mask has the following format:
|
||||||
|
|
||||||
|
```
|
||||||
|
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
|
||||||
|
| first sequence | second sequence |
|
||||||
|
```
|
||||||
|
|
||||||
|
if token_ids_1 is None, only returns the first portion of the mask (0s).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
token_ids_0 (`List[int]`):
|
||||||
|
List of ids.
|
||||||
|
token_ids_1 (`List[int]`, *optional*):
|
||||||
|
Optional second list of IDs for sequence pairs.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
`List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given sequence(s).
|
||||||
|
"""
|
||||||
|
bos_token_id = [self.bos_token_id] if self.add_bos_token else []
|
||||||
|
eos_token_id = [self.eos_token_id] if self.add_eos_token else []
|
||||||
|
|
||||||
|
output = [0] * len(bos_token_id + token_ids_0 + eos_token_id)
|
||||||
|
|
||||||
|
if token_ids_1 is not None:
|
||||||
|
output += [1] * len(bos_token_id + token_ids_1 + eos_token_id)
|
||||||
|
|
||||||
|
return output
|
||||||
3
tokenizer.model
Normal file
3
tokenizer.model
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:bcacff3229854f5103ee7a85473a30ca9a8b3a68f3aae9b7479574b23ac2256b
|
||||||
|
size 2475075
|
||||||
249
tokenizer_config.json
Normal file
249
tokenizer_config.json
Normal file
@@ -0,0 +1,249 @@
|
|||||||
|
{
|
||||||
|
"add_bos_token": true,
|
||||||
|
"add_eos_token": false,
|
||||||
|
"add_prefix_space": true,
|
||||||
|
"added_tokens_decoder": {
|
||||||
|
"0": {
|
||||||
|
"content": "<unk>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"1": {
|
||||||
|
"content": "<s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"2": {
|
||||||
|
"content": "</s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"128111": {
|
||||||
|
"content": "<restate>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"128112": {
|
||||||
|
"content": "</restate>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"128113": {
|
||||||
|
"content": "<planning>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"128114": {
|
||||||
|
"content": "</planning>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"128115": {
|
||||||
|
"content": "<recollect>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"128116": {
|
||||||
|
"content": "</recollect>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"128117": {
|
||||||
|
"content": "<execution>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"128118": {
|
||||||
|
"content": "</execution>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"128119": {
|
||||||
|
"content": "<review>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"128120": {
|
||||||
|
"content": "</review>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"128121": {
|
||||||
|
"content": "<summarize>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"128122": {
|
||||||
|
"content": "</summarize>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"128123": {
|
||||||
|
"content": "<retry>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"128124": {
|
||||||
|
"content": "</retry>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"128125": {
|
||||||
|
"content": "<conclude>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"128126": {
|
||||||
|
"content": "</conclude>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"128127": {
|
||||||
|
"content": "<|plugin|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"128128": {
|
||||||
|
"content": "<|interpreter|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"128129": {
|
||||||
|
"content": "<|action_end|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"128130": {
|
||||||
|
"content": "<|action_start|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"128131": {
|
||||||
|
"content": "<|im_end|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"128132": {
|
||||||
|
"content": "<|im_start|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additional_special_tokens": [
|
||||||
|
"<|im_start|>",
|
||||||
|
"<|im_end|>",
|
||||||
|
"<|action_start|>",
|
||||||
|
"<|action_end|>",
|
||||||
|
"<|interpreter|>",
|
||||||
|
"<|plugin|>",
|
||||||
|
"<restate>",
|
||||||
|
"</restate>",
|
||||||
|
"<planning>",
|
||||||
|
"</planning>",
|
||||||
|
"<recollect>",
|
||||||
|
"</recollect>",
|
||||||
|
"<execution>",
|
||||||
|
"</execution>",
|
||||||
|
"<review>",
|
||||||
|
"</review>",
|
||||||
|
"<summarize>",
|
||||||
|
"</summarize>",
|
||||||
|
"<retry>",
|
||||||
|
"</retry>",
|
||||||
|
"<conclude>",
|
||||||
|
"</conclude>"
|
||||||
|
],
|
||||||
|
"auto_map": {
|
||||||
|
"AutoTokenizer": [
|
||||||
|
"tokenization_internlm3.InternLM3Tokenizer",
|
||||||
|
null
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"bos_token": "<s>",
|
||||||
|
"chat_template": "{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
||||||
|
"clean_up_tokenization_spaces": false,
|
||||||
|
"eos_token": "</s>",
|
||||||
|
"extra_special_tokens": {},
|
||||||
|
"model_max_length": 1000000000000000019884624838656,
|
||||||
|
"pad_token": "</s>",
|
||||||
|
"sp_model_kwargs": {},
|
||||||
|
"spaces_between_special_tokens": false,
|
||||||
|
"tokenizer_class": "InternLM3Tokenizer",
|
||||||
|
"unk_token": "<unk>",
|
||||||
|
"use_default_system_prompt": false
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user