初始化项目,由ModelHub XC社区提供模型
Model: Henrychur/MMedS-Llama-3-8B Source: Original Platform
This commit is contained in:
35
.gitattributes
vendored
Normal file
35
.gitattributes
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
37
README.md
Normal file
37
README.md
Normal file
@@ -0,0 +1,37 @@
|
||||
---
|
||||
license: llama3
|
||||
datasets:
|
||||
- Henrychur/MMedC
|
||||
- Henrychur/MedS-Ins
|
||||
language:
|
||||
- en
|
||||
base_model: Henrychur/MMedS-Llama-3-8B
|
||||
tags:
|
||||
- medical
|
||||
library_name: transformers
|
||||
---
|
||||
|
||||
|
||||
# MMedS-Llama3
|
||||
[💻Github Repo](https://github.com/MAGIC-AI4Med/MedS-Ins) [🖨️arXiv Paper](https://arxiv.org/abs/2408.12547)
|
||||
|
||||
The official codes for "Towards Evaluating and Building Versatile Large Language Models for Medicine"
|
||||
|
||||
|
||||
## Introduction
|
||||
This repository hosts MMedS-Llama-3-8B. Its foundation model, [MMed-Llama-3-8B](https://huggingface.co/Henrychur/MMed-Llama-3-8B),
|
||||
is a multilingual medical language model which has undergone additional continuous pretraining on MMedC. Furthermore, the model has
|
||||
been fine-tuned under supervision using MedS-Ins, a comprehensive dataset designed specifically for supervised fine-tuning (SFT),
|
||||
featuring 13.5 million samples across 122 tasks. For more details, please refer to our paper.
|
||||
|
||||
|
||||
## Usage
|
||||
The model can be loaded as follows:
|
||||
```py
|
||||
import torch
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||
tokenizer = AutoTokenizer.from_pretrained("Henrychur/MMed-Llama-3-8B-EnIns")
|
||||
model = AutoModelForCausalLM.from_pretrained("Henrychur/MMed-Llama-3-8B-EnIns", torch_dtype=torch.float16)
|
||||
```
|
||||
|
||||
- Inference format is the same as Llama 3, you can check the inference code [here](https://github.com/MAGIC-AI4Med/MedS-Ins/blob/main/Inference/model.py).
|
||||
29
config.json
Normal file
29
config.json
Normal file
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"_name_or_path": "/mnt/hwfile/medai/qiupengcheng/SFT_model_weights/SFT_v4_2Epoch",
|
||||
"architectures": [
|
||||
"LlamaForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 128000,
|
||||
"eos_token_id": 128001,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 4096,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 14336,
|
||||
"max_position_embeddings": 8192,
|
||||
"mlp_bias": false,
|
||||
"model_type": "llama",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 32,
|
||||
"num_key_value_heads": 8,
|
||||
"pretraining_tp": 1,
|
||||
"rms_norm_eps": 1e-05,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 500000.0,
|
||||
"tie_word_embeddings": false,
|
||||
"torch_dtype": "float32",
|
||||
"transformers_version": "4.34.0",
|
||||
"use_cache": true,
|
||||
"vocab_size": 128256
|
||||
}
|
||||
6
generation_config.json
Normal file
6
generation_config.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"_from_model_config": true,
|
||||
"bos_token_id": 128000,
|
||||
"eos_token_id": 128001,
|
||||
"transformers_version": "4.34.0"
|
||||
}
|
||||
3
model-00001-of-00004.safetensors
Normal file
3
model-00001-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:bdd29b60fb05250ea3e48a23f011ce5690e610b989bdd511c929b3a0af88a7b7
|
||||
size 9953387744
|
||||
3
model-00002-of-00004.safetensors
Normal file
3
model-00002-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1bb8d44bac00a51f4678c66ead952ff1ad446811ce2c22034d249d74c37d5645
|
||||
size 9999593224
|
||||
3
model-00003-of-00004.safetensors
Normal file
3
model-00003-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:03cb3574899e93d620966d99a6f556e534759ccb420efac82e33df8583655a84
|
||||
size 9831820608
|
||||
3
model-00004-of-00004.safetensors
Normal file
3
model-00004-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c0a8e4cf46140643c4e811b7ddedf8afb8bd62e1b9f2ada8c6f8addd69e38c58
|
||||
size 2336277048
|
||||
298
model.safetensors.index.json
Normal file
298
model.safetensors.index.json
Normal file
@@ -0,0 +1,298 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_size": 32121044992
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "model-00004-of-00004.safetensors",
|
||||
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.norm.weight": "model-00004-of-00004.safetensors"
|
||||
}
|
||||
}
|
||||
3
pytorch_model-00001-of-00004.bin
Normal file
3
pytorch_model-00001-of-00004.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f2696123af3f780ddae823733e813e96ddfef71c155901ce179cc428e37e9f85
|
||||
size 9953406946
|
||||
3
pytorch_model-00002-of-00004.bin
Normal file
3
pytorch_model-00002-of-00004.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d67c698bbd06015a9c383d94938e581ae0e7dee865b0dc2df7918dc040c5daba
|
||||
size 9999617342
|
||||
3
pytorch_model-00003-of-00004.bin
Normal file
3
pytorch_model-00003-of-00004.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8fb27da98c325761293f19521fca4089ac97486c22d2552f165388ce4934aa23
|
||||
size 9831843814
|
||||
3
pytorch_model-00004-of-00004.bin
Normal file
3
pytorch_model-00004-of-00004.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:744d4fd0256f438e3430094f540e030c59c4ec1a0cc411c953dada32a60d2987
|
||||
size 2336279113
|
||||
298
pytorch_model.bin.index.json
Normal file
298
pytorch_model.bin.index.json
Normal file
@@ -0,0 +1,298 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_size": 32121044992
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "pytorch_model-00004-of-00004.bin",
|
||||
"model.embed_tokens.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.10.input_layernorm.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.10.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.10.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.10.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.10.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.10.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.10.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.10.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.10.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.11.input_layernorm.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.11.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.11.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.11.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.11.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.11.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.11.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.11.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.11.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.12.input_layernorm.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.12.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.12.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.12.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.12.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.12.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.12.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.12.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.12.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.13.input_layernorm.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.13.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.13.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.13.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.13.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.13.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.13.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.13.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.13.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.14.input_layernorm.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.14.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.14.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.14.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.14.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.14.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.14.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.14.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.14.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.15.input_layernorm.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.15.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.15.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.15.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.15.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.15.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.15.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.15.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.15.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.16.input_layernorm.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.16.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.16.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.16.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.16.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.16.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.16.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.16.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.16.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.17.input_layernorm.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.17.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.17.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.17.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.17.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.17.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.17.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.17.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.17.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.18.input_layernorm.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.18.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.18.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.18.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.18.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.18.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.18.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.18.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.18.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.19.input_layernorm.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.19.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.19.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.19.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.19.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.19.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.19.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.19.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.19.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.20.input_layernorm.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.20.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.20.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.20.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.20.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.20.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.20.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.20.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.20.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.21.input_layernorm.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.21.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.21.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.21.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.21.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.21.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.21.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.21.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.21.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.22.input_layernorm.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.22.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.22.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.22.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.22.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.22.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.22.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.22.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.22.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.23.input_layernorm.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.23.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.23.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.23.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.23.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.23.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.23.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.23.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.23.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.24.input_layernorm.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.24.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.24.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.24.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.24.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.24.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.24.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.24.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.24.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.25.input_layernorm.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.25.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.25.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.25.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.25.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.25.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.25.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.25.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.25.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.26.input_layernorm.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.26.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.26.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.26.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.26.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.26.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.26.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.26.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.26.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.27.input_layernorm.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.27.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.27.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.27.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.27.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.27.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.27.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.27.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.27.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.28.input_layernorm.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.28.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.28.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.28.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.28.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.28.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.28.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.28.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.28.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.29.input_layernorm.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.29.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.29.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.29.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.29.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.29.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.29.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.29.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.29.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.30.input_layernorm.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.30.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.30.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.30.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.30.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.30.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.30.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.30.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.30.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.31.input_layernorm.weight": "pytorch_model-00004-of-00004.bin",
|
||||
"model.layers.31.mlp.down_proj.weight": "pytorch_model-00004-of-00004.bin",
|
||||
"model.layers.31.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.31.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.31.post_attention_layernorm.weight": "pytorch_model-00004-of-00004.bin",
|
||||
"model.layers.31.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.31.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.31.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.31.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin",
|
||||
"model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.5.mlp.down_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.5.mlp.gate_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.5.mlp.up_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.5.post_attention_layernorm.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.6.mlp.down_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.6.mlp.gate_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.6.mlp.up_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.6.post_attention_layernorm.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.6.self_attn.k_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.6.self_attn.o_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.6.self_attn.q_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.6.self_attn.v_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.7.mlp.down_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.7.mlp.gate_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.7.mlp.up_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.7.post_attention_layernorm.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.7.self_attn.k_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.7.self_attn.o_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.7.self_attn.q_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.7.self_attn.v_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.8.mlp.down_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.8.mlp.gate_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.8.mlp.up_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.8.post_attention_layernorm.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.8.self_attn.k_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.8.self_attn.o_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.8.self_attn.q_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.8.self_attn.v_proj.weight": "pytorch_model-00001-of-00004.bin",
|
||||
"model.layers.9.input_layernorm.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.9.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.9.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.9.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.9.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.9.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.9.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.9.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.layers.9.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin",
|
||||
"model.norm.weight": "pytorch_model-00004-of-00004.bin"
|
||||
}
|
||||
}
|
||||
262
special_tokens_map.json
Normal file
262
special_tokens_map.json
Normal file
@@ -0,0 +1,262 @@
|
||||
{
|
||||
"additional_special_tokens": [
|
||||
"<|begin_of_text|>",
|
||||
"<|end_of_text|>",
|
||||
"<|reserved_special_token_0|>",
|
||||
"<|reserved_special_token_1|>",
|
||||
"<|reserved_special_token_2|>",
|
||||
"<|reserved_special_token_3|>",
|
||||
"<|start_header_id|>",
|
||||
"<|end_header_id|>",
|
||||
"<|reserved_special_token_4|>",
|
||||
"<|eot_id|>",
|
||||
"<|reserved_special_token_5|>",
|
||||
"<|reserved_special_token_6|>",
|
||||
"<|reserved_special_token_7|>",
|
||||
"<|reserved_special_token_8|>",
|
||||
"<|reserved_special_token_9|>",
|
||||
"<|reserved_special_token_10|>",
|
||||
"<|reserved_special_token_11|>",
|
||||
"<|reserved_special_token_12|>",
|
||||
"<|reserved_special_token_13|>",
|
||||
"<|reserved_special_token_14|>",
|
||||
"<|reserved_special_token_15|>",
|
||||
"<|reserved_special_token_16|>",
|
||||
"<|reserved_special_token_17|>",
|
||||
"<|reserved_special_token_18|>",
|
||||
"<|reserved_special_token_19|>",
|
||||
"<|reserved_special_token_20|>",
|
||||
"<|reserved_special_token_21|>",
|
||||
"<|reserved_special_token_22|>",
|
||||
"<|reserved_special_token_23|>",
|
||||
"<|reserved_special_token_24|>",
|
||||
"<|reserved_special_token_25|>",
|
||||
"<|reserved_special_token_26|>",
|
||||
"<|reserved_special_token_27|>",
|
||||
"<|reserved_special_token_28|>",
|
||||
"<|reserved_special_token_29|>",
|
||||
"<|reserved_special_token_30|>",
|
||||
"<|reserved_special_token_31|>",
|
||||
"<|reserved_special_token_32|>",
|
||||
"<|reserved_special_token_33|>",
|
||||
"<|reserved_special_token_34|>",
|
||||
"<|reserved_special_token_35|>",
|
||||
"<|reserved_special_token_36|>",
|
||||
"<|reserved_special_token_37|>",
|
||||
"<|reserved_special_token_38|>",
|
||||
"<|reserved_special_token_39|>",
|
||||
"<|reserved_special_token_40|>",
|
||||
"<|reserved_special_token_41|>",
|
||||
"<|reserved_special_token_42|>",
|
||||
"<|reserved_special_token_43|>",
|
||||
"<|reserved_special_token_44|>",
|
||||
"<|reserved_special_token_45|>",
|
||||
"<|reserved_special_token_46|>",
|
||||
"<|reserved_special_token_47|>",
|
||||
"<|reserved_special_token_48|>",
|
||||
"<|reserved_special_token_49|>",
|
||||
"<|reserved_special_token_50|>",
|
||||
"<|reserved_special_token_51|>",
|
||||
"<|reserved_special_token_52|>",
|
||||
"<|reserved_special_token_53|>",
|
||||
"<|reserved_special_token_54|>",
|
||||
"<|reserved_special_token_55|>",
|
||||
"<|reserved_special_token_56|>",
|
||||
"<|reserved_special_token_57|>",
|
||||
"<|reserved_special_token_58|>",
|
||||
"<|reserved_special_token_59|>",
|
||||
"<|reserved_special_token_60|>",
|
||||
"<|reserved_special_token_61|>",
|
||||
"<|reserved_special_token_62|>",
|
||||
"<|reserved_special_token_63|>",
|
||||
"<|reserved_special_token_64|>",
|
||||
"<|reserved_special_token_65|>",
|
||||
"<|reserved_special_token_66|>",
|
||||
"<|reserved_special_token_67|>",
|
||||
"<|reserved_special_token_68|>",
|
||||
"<|reserved_special_token_69|>",
|
||||
"<|reserved_special_token_70|>",
|
||||
"<|reserved_special_token_71|>",
|
||||
"<|reserved_special_token_72|>",
|
||||
"<|reserved_special_token_73|>",
|
||||
"<|reserved_special_token_74|>",
|
||||
"<|reserved_special_token_75|>",
|
||||
"<|reserved_special_token_76|>",
|
||||
"<|reserved_special_token_77|>",
|
||||
"<|reserved_special_token_78|>",
|
||||
"<|reserved_special_token_79|>",
|
||||
"<|reserved_special_token_80|>",
|
||||
"<|reserved_special_token_81|>",
|
||||
"<|reserved_special_token_82|>",
|
||||
"<|reserved_special_token_83|>",
|
||||
"<|reserved_special_token_84|>",
|
||||
"<|reserved_special_token_85|>",
|
||||
"<|reserved_special_token_86|>",
|
||||
"<|reserved_special_token_87|>",
|
||||
"<|reserved_special_token_88|>",
|
||||
"<|reserved_special_token_89|>",
|
||||
"<|reserved_special_token_90|>",
|
||||
"<|reserved_special_token_91|>",
|
||||
"<|reserved_special_token_92|>",
|
||||
"<|reserved_special_token_93|>",
|
||||
"<|reserved_special_token_94|>",
|
||||
"<|reserved_special_token_95|>",
|
||||
"<|reserved_special_token_96|>",
|
||||
"<|reserved_special_token_97|>",
|
||||
"<|reserved_special_token_98|>",
|
||||
"<|reserved_special_token_99|>",
|
||||
"<|reserved_special_token_100|>",
|
||||
"<|reserved_special_token_101|>",
|
||||
"<|reserved_special_token_102|>",
|
||||
"<|reserved_special_token_103|>",
|
||||
"<|reserved_special_token_104|>",
|
||||
"<|reserved_special_token_105|>",
|
||||
"<|reserved_special_token_106|>",
|
||||
"<|reserved_special_token_107|>",
|
||||
"<|reserved_special_token_108|>",
|
||||
"<|reserved_special_token_109|>",
|
||||
"<|reserved_special_token_110|>",
|
||||
"<|reserved_special_token_111|>",
|
||||
"<|reserved_special_token_112|>",
|
||||
"<|reserved_special_token_113|>",
|
||||
"<|reserved_special_token_114|>",
|
||||
"<|reserved_special_token_115|>",
|
||||
"<|reserved_special_token_116|>",
|
||||
"<|reserved_special_token_117|>",
|
||||
"<|reserved_special_token_118|>",
|
||||
"<|reserved_special_token_119|>",
|
||||
"<|reserved_special_token_120|>",
|
||||
"<|reserved_special_token_121|>",
|
||||
"<|reserved_special_token_122|>",
|
||||
"<|reserved_special_token_123|>",
|
||||
"<|reserved_special_token_124|>",
|
||||
"<|reserved_special_token_125|>",
|
||||
"<|reserved_special_token_126|>",
|
||||
"<|reserved_special_token_127|>",
|
||||
"<|reserved_special_token_128|>",
|
||||
"<|reserved_special_token_129|>",
|
||||
"<|reserved_special_token_130|>",
|
||||
"<|reserved_special_token_131|>",
|
||||
"<|reserved_special_token_132|>",
|
||||
"<|reserved_special_token_133|>",
|
||||
"<|reserved_special_token_134|>",
|
||||
"<|reserved_special_token_135|>",
|
||||
"<|reserved_special_token_136|>",
|
||||
"<|reserved_special_token_137|>",
|
||||
"<|reserved_special_token_138|>",
|
||||
"<|reserved_special_token_139|>",
|
||||
"<|reserved_special_token_140|>",
|
||||
"<|reserved_special_token_141|>",
|
||||
"<|reserved_special_token_142|>",
|
||||
"<|reserved_special_token_143|>",
|
||||
"<|reserved_special_token_144|>",
|
||||
"<|reserved_special_token_145|>",
|
||||
"<|reserved_special_token_146|>",
|
||||
"<|reserved_special_token_147|>",
|
||||
"<|reserved_special_token_148|>",
|
||||
"<|reserved_special_token_149|>",
|
||||
"<|reserved_special_token_150|>",
|
||||
"<|reserved_special_token_151|>",
|
||||
"<|reserved_special_token_152|>",
|
||||
"<|reserved_special_token_153|>",
|
||||
"<|reserved_special_token_154|>",
|
||||
"<|reserved_special_token_155|>",
|
||||
"<|reserved_special_token_156|>",
|
||||
"<|reserved_special_token_157|>",
|
||||
"<|reserved_special_token_158|>",
|
||||
"<|reserved_special_token_159|>",
|
||||
"<|reserved_special_token_160|>",
|
||||
"<|reserved_special_token_161|>",
|
||||
"<|reserved_special_token_162|>",
|
||||
"<|reserved_special_token_163|>",
|
||||
"<|reserved_special_token_164|>",
|
||||
"<|reserved_special_token_165|>",
|
||||
"<|reserved_special_token_166|>",
|
||||
"<|reserved_special_token_167|>",
|
||||
"<|reserved_special_token_168|>",
|
||||
"<|reserved_special_token_169|>",
|
||||
"<|reserved_special_token_170|>",
|
||||
"<|reserved_special_token_171|>",
|
||||
"<|reserved_special_token_172|>",
|
||||
"<|reserved_special_token_173|>",
|
||||
"<|reserved_special_token_174|>",
|
||||
"<|reserved_special_token_175|>",
|
||||
"<|reserved_special_token_176|>",
|
||||
"<|reserved_special_token_177|>",
|
||||
"<|reserved_special_token_178|>",
|
||||
"<|reserved_special_token_179|>",
|
||||
"<|reserved_special_token_180|>",
|
||||
"<|reserved_special_token_181|>",
|
||||
"<|reserved_special_token_182|>",
|
||||
"<|reserved_special_token_183|>",
|
||||
"<|reserved_special_token_184|>",
|
||||
"<|reserved_special_token_185|>",
|
||||
"<|reserved_special_token_186|>",
|
||||
"<|reserved_special_token_187|>",
|
||||
"<|reserved_special_token_188|>",
|
||||
"<|reserved_special_token_189|>",
|
||||
"<|reserved_special_token_190|>",
|
||||
"<|reserved_special_token_191|>",
|
||||
"<|reserved_special_token_192|>",
|
||||
"<|reserved_special_token_193|>",
|
||||
"<|reserved_special_token_194|>",
|
||||
"<|reserved_special_token_195|>",
|
||||
"<|reserved_special_token_196|>",
|
||||
"<|reserved_special_token_197|>",
|
||||
"<|reserved_special_token_198|>",
|
||||
"<|reserved_special_token_199|>",
|
||||
"<|reserved_special_token_200|>",
|
||||
"<|reserved_special_token_201|>",
|
||||
"<|reserved_special_token_202|>",
|
||||
"<|reserved_special_token_203|>",
|
||||
"<|reserved_special_token_204|>",
|
||||
"<|reserved_special_token_205|>",
|
||||
"<|reserved_special_token_206|>",
|
||||
"<|reserved_special_token_207|>",
|
||||
"<|reserved_special_token_208|>",
|
||||
"<|reserved_special_token_209|>",
|
||||
"<|reserved_special_token_210|>",
|
||||
"<|reserved_special_token_211|>",
|
||||
"<|reserved_special_token_212|>",
|
||||
"<|reserved_special_token_213|>",
|
||||
"<|reserved_special_token_214|>",
|
||||
"<|reserved_special_token_215|>",
|
||||
"<|reserved_special_token_216|>",
|
||||
"<|reserved_special_token_217|>",
|
||||
"<|reserved_special_token_218|>",
|
||||
"<|reserved_special_token_219|>",
|
||||
"<|reserved_special_token_220|>",
|
||||
"<|reserved_special_token_221|>",
|
||||
"<|reserved_special_token_222|>",
|
||||
"<|reserved_special_token_223|>",
|
||||
"<|reserved_special_token_224|>",
|
||||
"<|reserved_special_token_225|>",
|
||||
"<|reserved_special_token_226|>",
|
||||
"<|reserved_special_token_227|>",
|
||||
"<|reserved_special_token_228|>",
|
||||
"<|reserved_special_token_229|>",
|
||||
"<|reserved_special_token_230|>",
|
||||
"<|reserved_special_token_231|>",
|
||||
"<|reserved_special_token_232|>",
|
||||
"<|reserved_special_token_233|>",
|
||||
"<|reserved_special_token_234|>",
|
||||
"<|reserved_special_token_235|>",
|
||||
"<|reserved_special_token_236|>",
|
||||
"<|reserved_special_token_237|>",
|
||||
"<|reserved_special_token_238|>",
|
||||
"<|reserved_special_token_239|>",
|
||||
"<|reserved_special_token_240|>",
|
||||
"<|reserved_special_token_241|>",
|
||||
"<|reserved_special_token_242|>",
|
||||
"<|reserved_special_token_243|>",
|
||||
"<|reserved_special_token_244|>",
|
||||
"<|reserved_special_token_245|>",
|
||||
"<|reserved_special_token_246|>",
|
||||
"<|reserved_special_token_247|>",
|
||||
"<|reserved_special_token_248|>",
|
||||
"<|reserved_special_token_249|>",
|
||||
"<|reserved_special_token_250|>"
|
||||
],
|
||||
"bos_token": "<|begin_of_text|>",
|
||||
"eos_token": "<|end_of_text|>"
|
||||
}
|
||||
410503
tokenizer.json
Normal file
410503
tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
2321
tokenizer_config.json
Normal file
2321
tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user