初始化项目,由ModelHub XC社区提供模型

Model: AI-ModelScope/openbuddy-falcon-7b-v15-fp16
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-06-01 19:56:13 +08:00
commit f0600c5f7f
14 changed files with 177664 additions and 0 deletions

34
.gitattributes vendored Normal file
View File

@@ -0,0 +1,34 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text

53
README.md Normal file
View File

@@ -0,0 +1,53 @@
---
license: Apache License 2.0
tasks:
- text-generation
pipeline_tag: text2text-generation
language:
- zh
- en
- fr
- de
- ja
- ko
- it
- ru
domain:
- nlp
frameworks:
- pytorch
backbone:
- transformer
---
## 使用方法
```python
# pip install "transformers<4.45" -U
from modelscope.utils.constant import Tasks
from modelscope.pipelines import pipeline
pipe = pipeline(task=Tasks.text_generation, model='AI-ModelScope/openbuddy-falcon-7b-v15-fp16', model_revision='master')
system_prompt = '''Consider a conversation between User (a human) and Assistant (named Buddy).
Buddy is an INTP-T, a friendly, intelligent and multilingual AI assistant, by OpenBuddy team, based on Falcon and LLaMA Transformers architecture. GitHub: https://github.com/OpenBuddy/OpenBuddy
Buddy cannot access the Internet.
Buddy can fluently speak the user's language (e.g. English, Chinese).
Buddy can generate poems, stories, code, essays, songs, and more.
Buddy possesses knowledge about the world, history, and culture, but not everything. Knowledge cutoff: 2021-09.
Buddy's responses are always positive, unharmful, safe, creative, high-quality, human-like, and interesting.
Buddy must always be safe and unharmful to humans.
Buddy strictly refuses to discuss harmful, political, NSFW, illegal, abusive, offensive, or other sensitive topics.'''
prompt = system_prompt + "\n\nUser: Write a poem about yourself.\nAssistant:"
print(pipe(prompt))
```
## 免责声明
所有OpenBuddy模型均存在固有的局限性可能产生错误的、有害的、冒犯性的或其他不良的输出。用户在关键或高风险场景中应谨慎行事不要使用这些模型以免导致人身伤害、财产损失或重大损失。此类场景的例子包括但不限于医疗领域、可能导致伤害的软硬件系统的控制以及进行重要的财务或法律决策。
OpenBuddy按“原样”提供不附带任何种类的明示或暗示的保证包括但不限于适销性、特定目的的适用性和非侵权的暗示保证。在任何情况下作者、贡献者或版权所有者均不对因软件或使用或其他软件交易而产生的任何索赔、损害赔偿或其他责任无论是合同、侵权还是其他原因承担责任。
使用OpenBuddy即表示您同意这些条款和条件并承认您了解其使用可能带来的潜在风险。您还同意赔偿并使作者、贡献者和版权所有者免受因您使用OpenBuddy而产生的任何索赔、损害赔偿或责任的影响。

33
config.json Normal file
View File

@@ -0,0 +1,33 @@
{
"_name_or_path": "openbuddy-falcon-7b-v1.5",
"alibi": false,
"apply_residual_connection_post_layernorm": false,
"architectures": [
"RWForCausalLM"
],
"attention_dropout": 0.0,
"auto_map": {
"AutoConfig": "configuration_RW.RWConfig",
"AutoModel": "modelling_RW.RWModel",
"AutoModelForCausalLM": "modelling_RW.RWForCausalLM",
"AutoModelForQuestionAnswering": "modelling_RW.RWForQuestionAnswering",
"AutoModelForSequenceClassification": "modelling_RW.RWForSequenceClassification",
"AutoModelForTokenClassification": "modelling_RW.RWForTokenClassification"
},
"bias": false,
"bos_token_id": 11,
"eos_token_id": 11,
"hidden_dropout": 0.0,
"hidden_size": 4544,
"initializer_range": 0.02,
"layer_norm_epsilon": 1e-05,
"model_type": "RefinedWebModel",
"multi_query": true,
"n_head": 71,
"n_layer": 32,
"parallel_attn": true,
"torch_dtype": "float16",
"transformers_version": "4.29.2",
"use_cache": false,
"vocab_size": 70144
}

11
configuration.json Normal file
View File

@@ -0,0 +1,11 @@
{
"framework": "pytorch",
"task": "text-generation",
"model": {
"type": "openbuddy-falcon-7b-v1-5-fp16"
},
"pipeline": {
"type": "openbuddy-falcon-7b-v1-5-fp16-text-generation-pipe"
},
"allow_remote": true
}

79
configuration_RW.py Normal file
View File

@@ -0,0 +1,79 @@
# coding=utf-8
# Copyright 2022 the Big Science Workshop and HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Bloom configuration"""
from transformers.configuration_utils import PretrainedConfig
from transformers.utils import logging
logger = logging.get_logger(__name__)
class RWConfig(PretrainedConfig):
model_type = "RefinedWebModel"
keys_to_ignore_at_inference = ["past_key_values"]
attribute_map = {
"num_hidden_layers": "n_layer",
"num_attention_heads": "n_head",
}
def __init__(
self,
vocab_size=250880,
hidden_size=64,
n_layer=2,
n_head=8,
layer_norm_epsilon=1e-5,
initializer_range=0.02,
use_cache=True,
bos_token_id=1,
eos_token_id=2,
apply_residual_connection_post_layernorm=False,
hidden_dropout=0.0,
attention_dropout=0.0,
multi_query=False,
alibi=False,
bias=False,
parallel_attn=False,
**kwargs,
):
self.vocab_size = vocab_size
# Backward compatibility with n_embed kwarg
n_embed = kwargs.pop("n_embed", None)
self.hidden_size = hidden_size if n_embed is None else n_embed
self.n_layer = n_layer
self.n_head = n_head
self.layer_norm_epsilon = layer_norm_epsilon
self.initializer_range = initializer_range
self.use_cache = use_cache
self.apply_residual_connection_post_layernorm = apply_residual_connection_post_layernorm
self.hidden_dropout = hidden_dropout
self.attention_dropout = attention_dropout
self.bos_token_id = bos_token_id
self.eos_token_id = eos_token_id
self.multi_query = multi_query
self.alibi = alibi
self.bias = bias
self.parallel_attn = parallel_attn
super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
@property
def head_dim(self):
return self.hidden_size // self.n_head
@property
def rotary(self):
return not self.alibi

6
generation_config.json Normal file
View File

@@ -0,0 +1,6 @@
{
"_from_model_config": true,
"bos_token_id": 1,
"eos_token_id": 2,
"transformers_version": "4.29.2"
}

1100
modelling_RW.py Normal file

File diff suppressed because it is too large Load Diff

65
ms_wrapper.py Normal file
View File

@@ -0,0 +1,65 @@
import os
from typing import Any, Dict, Union
import torch
from modelscope.models.base import Model, TorchModel
from modelscope.models.builder import MODELS
from modelscope.pipelines.base import Pipeline
from modelscope.pipelines.builder import PIPELINES
from modelscope.utils.constant import Tasks
from modelscope.utils.logger import get_logger
from transformers import AutoModelForCausalLM, AutoTokenizer
os.environ['CUDA_VISIBLE_DEVICES'] = "0"
@PIPELINES.register_module(Tasks.text_generation, module_name='openbuddy-falcon-7b-v1-5-fp16-text-generation-pipe')
class openbuddyfalcon7bv15fp16TextGenerationPipeline(Pipeline):
def __init__(
self,
model: Union[Model, str],
*args,
**kwargs):
model = openbuddyfalcon7bv15fp16TextGeneration(model) if isinstance(model, str) else model
super().__init__(model=model, **kwargs)
def preprocess(self, inputs, **preprocess_params) -> Dict[str, Any]:
return inputs
# define the forward pass
def forward(self, inputs: Dict, **forward_params) -> Dict[str, Any]:
return self.model(inputs)
# format the outputs from pipeline
def postprocess(self, input, **kwargs) -> Dict[str, Any]:
return input
@MODELS.register_module(Tasks.text_generation, module_name='openbuddy-falcon-7b-v1-5-fp16')
class openbuddyfalcon7bv15fp16TextGeneration(TorchModel):
def __init__(self, model_dir=None, *args, **kwargs):
super().__init__(model_dir, *args, **kwargs)
self.logger = get_logger()
# loading tokenizer
self.tokenizer = AutoTokenizer.from_pretrained(model_dir)
self.model = AutoModelForCausalLM.from_pretrained(model_dir, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True)
self.model = self.model.eval()
def forward(self,input: Dict, *args, **kwargs) -> Dict[str, Any]:
output = {}
res = self.infer(input)
output['text'] = res
return output
def quantize(self, bits: int):
self.model = self.model.quantize(bits)
return self
def infer(self, input):
device = self.model.device
input_ids = self.tokenizer.encode(input, return_tensors='pt').to(device)
output_ids = self.model.generate(input_ids, max_length=2048)
out = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
return out

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:2a9ec7042a9da5a7473098e42856790d085bfa928f247b246b63db5c32f0ce87
size 9997556833

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b5d8a57278c936290b16cf3eca0c2b30957d2d2c5171e29c49ed8f2ebf521956
size 4529951399

View File

@@ -0,0 +1,203 @@
{
"metadata": {
"total_size": 14527440640
},
"weight_map": {
"lm_head.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.0.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
"transformer.h.0.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.0.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.0.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.0.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.0.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.1.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
"transformer.h.1.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.1.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.1.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.1.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.1.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.10.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
"transformer.h.10.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.10.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.10.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.10.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.10.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.11.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
"transformer.h.11.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.11.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.11.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.11.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.11.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.12.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
"transformer.h.12.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.12.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.12.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.12.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.12.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.13.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
"transformer.h.13.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.13.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.13.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.13.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.13.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.14.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
"transformer.h.14.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.14.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.14.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.14.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.14.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.15.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
"transformer.h.15.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.15.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.15.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.15.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.15.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.16.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
"transformer.h.16.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.16.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.16.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.16.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.16.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.17.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
"transformer.h.17.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.17.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.17.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.17.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.17.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.18.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
"transformer.h.18.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.18.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.18.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.18.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.18.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.19.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
"transformer.h.19.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.19.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.19.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.19.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.19.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.2.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
"transformer.h.2.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.2.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.2.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.2.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.2.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.20.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
"transformer.h.20.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.20.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.20.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.20.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.20.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.21.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
"transformer.h.21.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.21.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.21.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.21.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.21.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.22.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
"transformer.h.22.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.22.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.22.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.22.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.22.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.23.input_layernorm.bias": "pytorch_model-00002-of-00002.bin",
"transformer.h.23.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.23.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.23.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.23.self_attention.dense.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.23.self_attention.query_key_value.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.24.input_layernorm.bias": "pytorch_model-00002-of-00002.bin",
"transformer.h.24.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.24.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.24.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.24.self_attention.dense.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.24.self_attention.query_key_value.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.25.input_layernorm.bias": "pytorch_model-00002-of-00002.bin",
"transformer.h.25.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.25.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.25.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.25.self_attention.dense.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.25.self_attention.query_key_value.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.26.input_layernorm.bias": "pytorch_model-00002-of-00002.bin",
"transformer.h.26.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.26.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.26.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.26.self_attention.dense.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.26.self_attention.query_key_value.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.27.input_layernorm.bias": "pytorch_model-00002-of-00002.bin",
"transformer.h.27.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.27.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.27.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.27.self_attention.dense.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.27.self_attention.query_key_value.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.28.input_layernorm.bias": "pytorch_model-00002-of-00002.bin",
"transformer.h.28.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.28.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.28.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.28.self_attention.dense.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.28.self_attention.query_key_value.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.29.input_layernorm.bias": "pytorch_model-00002-of-00002.bin",
"transformer.h.29.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.29.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.29.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.29.self_attention.dense.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.29.self_attention.query_key_value.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.3.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
"transformer.h.3.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.3.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.3.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.3.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.3.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.30.input_layernorm.bias": "pytorch_model-00002-of-00002.bin",
"transformer.h.30.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.30.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.30.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.30.self_attention.dense.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.30.self_attention.query_key_value.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.31.input_layernorm.bias": "pytorch_model-00002-of-00002.bin",
"transformer.h.31.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.31.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.31.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.31.self_attention.dense.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.31.self_attention.query_key_value.weight": "pytorch_model-00002-of-00002.bin",
"transformer.h.4.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
"transformer.h.4.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.4.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.4.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.4.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.4.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.5.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
"transformer.h.5.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.5.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.5.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.5.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.5.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.6.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
"transformer.h.6.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.6.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.6.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.6.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.6.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.7.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
"transformer.h.7.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.7.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.7.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.7.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.7.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.8.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
"transformer.h.8.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.8.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.8.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.8.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.8.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.9.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
"transformer.h.9.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.9.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.9.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.9.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
"transformer.h.9.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
"transformer.ln_f.bias": "pytorch_model-00002-of-00002.bin",
"transformer.ln_f.weight": "pytorch_model-00002-of-00002.bin",
"transformer.word_embeddings.weight": "pytorch_model-00001-of-00002.bin"
}
}

16
special_tokens_map.json Normal file
View File

@@ -0,0 +1,16 @@
{
"additional_special_tokens": [
">>TITLE<<",
">>ABSTRACT<<",
">>INTRODUCTION<<",
">>SUMMARY<<",
">>COMMENT<<",
">>ANSWER<<",
">>QUESTION<<",
">>DOMAIN<<",
">>PREFIX<<",
">>SUFFIX<<",
">>MIDDLE<<"
],
"eos_token": "<|endoftext|>"
}

176051
tokenizer.json Normal file

File diff suppressed because it is too large Load Diff

7
tokenizer_config.json Normal file
View File

@@ -0,0 +1,7 @@
{
"add_prefix_space": false,
"clean_up_tokenization_spaces": true,
"eos_token": "<|endoftext|>",
"model_max_length": 2048,
"tokenizer_class": "PreTrainedTokenizerFast"
}