初始化项目，由ModelHub XC社区提供模型

Model: AI-ModelScope/openbuddy-falcon-7b-v15-fp16 Source: Original Platform
2026-06-01 19:56:13 +08:00
commit f0600c5f7f
14 changed files with 177664 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,34 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ckpt filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.mlmodel filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
 *.npy filter=lfs diff=lfs merge=lfs -text
 *.npz filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pickle filter=lfs diff=lfs merge=lfs -text
 *.pkl filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
--- a/README.md
+++ b/README.md
@@ -0,0 +1,53 @@
 ---
 license: Apache License 2.0
 tasks:
 - text-generation
 pipeline_tag: text2text-generation
 language:
 - zh
 - en
 - fr
 - de
 - ja
 - ko
 - it
 - ru
 domain:
 - nlp
 frameworks:
 - pytorch
 backbone:
 - transformer
 ---
 ## 使用方法
 ```python
 # pip install "transformers<4.45" -U
 from modelscope.utils.constant import Tasks
 from modelscope.pipelines import pipeline
 pipe = pipeline(task=Tasks.text_generation, model='AI-ModelScope/openbuddy-falcon-7b-v15-fp16', model_revision='master')
 system_prompt = '''Consider a conversation between User (a human) and Assistant (named Buddy).
 Buddy is an INTP-T, a friendly, intelligent and multilingual AI assistant, by OpenBuddy team, based on Falcon and LLaMA Transformers architecture. GitHub: https://github.com/OpenBuddy/OpenBuddy
 Buddy cannot access the Internet.
 Buddy can fluently speak the user's language (e.g. English, Chinese).
 Buddy can generate poems, stories, code, essays, songs, and more.
 Buddy possesses knowledge about the world, history, and culture, but not everything. Knowledge cutoff: 2021-09.
 Buddy's responses are always positive, unharmful, safe, creative, high-quality, human-like, and interesting.
 Buddy must always be safe and unharmful to humans.
 Buddy strictly refuses to discuss harmful, political, NSFW, illegal, abusive, offensive, or other sensitive topics.'''
 prompt = system_prompt + "\n\nUser: Write a poem about yourself.\nAssistant:"
 print(pipe(prompt))
 ```
 ## 免责声明
 所有OpenBuddy模型均存在固有的局限性，可能产生错误的、有害的、冒犯性的或其他不良的输出。用户在关键或高风险场景中应谨慎行事，不要使用这些模型，以免导致人身伤害、财产损失或重大损失。此类场景的例子包括但不限于医疗领域、可能导致伤害的软硬件系统的控制以及进行重要的财务或法律决策。
 OpenBuddy按“原样”提供，不附带任何种类的明示或暗示的保证，包括但不限于适销性、特定目的的适用性和非侵权的暗示保证。在任何情况下，作者、贡献者或版权所有者均不对因软件或使用或其他软件交易而产生的任何索赔、损害赔偿或其他责任（无论是合同、侵权还是其他原因）承担责任。
 使用OpenBuddy即表示您同意这些条款和条件，并承认您了解其使用可能带来的潜在风险。您还同意赔偿并使作者、贡献者和版权所有者免受因您使用OpenBuddy而产生的任何索赔、损害赔偿或责任的影响。
--- a/config.json
+++ b/config.json
@@ -0,0 +1,33 @@
 {
  "_name_or_path": "openbuddy-falcon-7b-v1.5",
  "alibi": false,
  "apply_residual_connection_post_layernorm": false,
  "architectures": [
    "RWForCausalLM"
  ],
  "attention_dropout": 0.0,
  "auto_map": {
    "AutoConfig": "configuration_RW.RWConfig",
    "AutoModel": "modelling_RW.RWModel",
    "AutoModelForCausalLM": "modelling_RW.RWForCausalLM",
    "AutoModelForQuestionAnswering": "modelling_RW.RWForQuestionAnswering",
    "AutoModelForSequenceClassification": "modelling_RW.RWForSequenceClassification",
    "AutoModelForTokenClassification": "modelling_RW.RWForTokenClassification"
  },
  "bias": false,
  "bos_token_id": 11,
  "eos_token_id": 11,
  "hidden_dropout": 0.0,
  "hidden_size": 4544,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "RefinedWebModel",
  "multi_query": true,
  "n_head": 71,
  "n_layer": 32,
  "parallel_attn": true,
  "torch_dtype": "float16",
  "transformers_version": "4.29.2",
  "use_cache": false,
  "vocab_size": 70144
 }
--- a/configuration.json
+++ b/configuration.json
@@ -0,0 +1,11 @@
 {
    "framework": "pytorch",
    "task": "text-generation",
    "model": {
        "type": "openbuddy-falcon-7b-v1-5-fp16"
    },
    "pipeline": {
        "type": "openbuddy-falcon-7b-v1-5-fp16-text-generation-pipe"
    },
    "allow_remote": true
 }
--- a/configuration_RW.py
+++ b/configuration_RW.py
@@ -0,0 +1,79 @@
 # coding=utf-8
 # Copyright 2022 the Big Science Workshop and HuggingFace Inc. team.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Bloom configuration"""
 from transformers.configuration_utils import PretrainedConfig
 from transformers.utils import logging
 logger = logging.get_logger(__name__)
 class RWConfig(PretrainedConfig):
    model_type = "RefinedWebModel"
    keys_to_ignore_at_inference = ["past_key_values"]
    attribute_map = {
        "num_hidden_layers": "n_layer",
        "num_attention_heads": "n_head",
    }
    def __init__(
        self,
        vocab_size=250880,
        hidden_size=64,
        n_layer=2,
        n_head=8,
        layer_norm_epsilon=1e-5,
        initializer_range=0.02,
        use_cache=True,
        bos_token_id=1,
        eos_token_id=2,
        apply_residual_connection_post_layernorm=False,
        hidden_dropout=0.0,
        attention_dropout=0.0,
        multi_query=False,
        alibi=False,
        bias=False,
        parallel_attn=False,
        **kwargs,
    ):
        self.vocab_size = vocab_size
        # Backward compatibility with n_embed kwarg
        n_embed = kwargs.pop("n_embed", None)
        self.hidden_size = hidden_size if n_embed is None else n_embed
        self.n_layer = n_layer
        self.n_head = n_head
        self.layer_norm_epsilon = layer_norm_epsilon
        self.initializer_range = initializer_range
        self.use_cache = use_cache
        self.apply_residual_connection_post_layernorm = apply_residual_connection_post_layernorm
        self.hidden_dropout = hidden_dropout
        self.attention_dropout = attention_dropout
        self.bos_token_id = bos_token_id
        self.eos_token_id = eos_token_id
        self.multi_query = multi_query
        self.alibi = alibi
        self.bias = bias
        self.parallel_attn = parallel_attn
        super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
    @property
    def head_dim(self):
        return self.hidden_size // self.n_head
    @property
    def rotary(self):
        return not self.alibi
--- a/generation_config.json
+++ b/generation_config.json
@@ -0,0 +1,6 @@
 {
  "_from_model_config": true,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "transformers_version": "4.29.2"
 }
--- a/modelling_RW.py
+++ b/modelling_RW.py
--- a/ms_wrapper.py
+++ b/ms_wrapper.py
@@ -0,0 +1,65 @@
 import os
 from typing import Any, Dict, Union
 import torch
 from modelscope.models.base import Model, TorchModel
 from modelscope.models.builder import MODELS
 from modelscope.pipelines.base import Pipeline
 from modelscope.pipelines.builder import PIPELINES
 from modelscope.utils.constant import Tasks
 from modelscope.utils.logger import get_logger
 from transformers import AutoModelForCausalLM, AutoTokenizer
 os.environ['CUDA_VISIBLE_DEVICES'] = "0"
@PIPELINES.register_module(Tasks.text_generation, module_name='openbuddy-falcon-7b-v1-5-fp16-text-generation-pipe')
 class openbuddyfalcon7bv15fp16TextGenerationPipeline(Pipeline):
    def __init__(
            self,
            model: Union[Model, str],
            *args,
            **kwargs):
        model = openbuddyfalcon7bv15fp16TextGeneration(model) if isinstance(model, str) else model
        super().__init__(model=model, **kwargs)
    def preprocess(self, inputs, **preprocess_params) -> Dict[str, Any]:
        return inputs
    # define the forward pass
    def forward(self, inputs: Dict, **forward_params) -> Dict[str, Any]:
        return self.model(inputs)
    # format the outputs from pipeline
    def postprocess(self, input, **kwargs) -> Dict[str, Any]:
        return input
@MODELS.register_module(Tasks.text_generation, module_name='openbuddy-falcon-7b-v1-5-fp16')
 class openbuddyfalcon7bv15fp16TextGeneration(TorchModel):
    def __init__(self, model_dir=None, *args, **kwargs):
        super().__init__(model_dir, *args, **kwargs)
        self.logger = get_logger()
        # loading tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(model_dir)
        self.model = AutoModelForCausalLM.from_pretrained(model_dir, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True)
        self.model = self.model.eval()
    def forward(self,input: Dict, *args, **kwargs) -> Dict[str, Any]:
        output = {}
        res = self.infer(input)
        output['text'] = res
        return output
    def quantize(self, bits: int):
        self.model = self.model.quantize(bits)
        return self
    def infer(self, input):
        device = self.model.device
        input_ids = self.tokenizer.encode(input, return_tensors='pt').to(device)
        output_ids = self.model.generate(input_ids, max_length=2048)
        out = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
        return out
--- a/pytorch_model-00001-of-00002.bin
+++ b/pytorch_model-00001-of-00002.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:2a9ec7042a9da5a7473098e42856790d085bfa928f247b246b63db5c32f0ce87
 size 9997556833
--- a/pytorch_model-00002-of-00002.bin
+++ b/pytorch_model-00002-of-00002.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:b5d8a57278c936290b16cf3eca0c2b30957d2d2c5171e29c49ed8f2ebf521956
 size 4529951399
--- a/pytorch_model.bin.index.json
+++ b/pytorch_model.bin.index.json
@@ -0,0 +1,203 @@
 {
  "metadata": {
    "total_size": 14527440640
  },
  "weight_map": {
    "lm_head.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.0.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
    "transformer.h.0.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.0.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.0.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.0.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.0.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.1.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
    "transformer.h.1.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.1.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.1.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.1.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.1.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.10.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
    "transformer.h.10.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.10.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.10.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.10.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.10.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.11.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
    "transformer.h.11.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.11.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.11.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.11.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.11.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.12.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
    "transformer.h.12.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.12.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.12.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.12.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.12.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.13.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
    "transformer.h.13.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.13.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.13.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.13.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.13.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.14.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
    "transformer.h.14.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.14.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.14.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.14.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.14.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.15.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
    "transformer.h.15.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.15.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.15.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.15.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.15.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.16.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
    "transformer.h.16.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.16.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.16.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.16.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.16.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.17.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
    "transformer.h.17.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.17.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.17.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.17.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.17.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.18.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
    "transformer.h.18.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.18.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.18.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.18.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.18.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.19.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
    "transformer.h.19.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.19.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.19.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.19.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.19.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.2.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
    "transformer.h.2.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.2.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.2.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.2.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.2.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.20.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
    "transformer.h.20.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.20.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.20.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.20.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.20.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.21.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
    "transformer.h.21.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.21.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.21.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.21.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.21.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.22.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
    "transformer.h.22.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.22.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.22.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.22.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.22.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.23.input_layernorm.bias": "pytorch_model-00002-of-00002.bin",
    "transformer.h.23.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.23.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.23.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.23.self_attention.dense.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.23.self_attention.query_key_value.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.24.input_layernorm.bias": "pytorch_model-00002-of-00002.bin",
    "transformer.h.24.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.24.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.24.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.24.self_attention.dense.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.24.self_attention.query_key_value.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.25.input_layernorm.bias": "pytorch_model-00002-of-00002.bin",
    "transformer.h.25.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.25.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.25.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.25.self_attention.dense.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.25.self_attention.query_key_value.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.26.input_layernorm.bias": "pytorch_model-00002-of-00002.bin",
    "transformer.h.26.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.26.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.26.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.26.self_attention.dense.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.26.self_attention.query_key_value.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.27.input_layernorm.bias": "pytorch_model-00002-of-00002.bin",
    "transformer.h.27.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.27.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.27.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.27.self_attention.dense.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.27.self_attention.query_key_value.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.28.input_layernorm.bias": "pytorch_model-00002-of-00002.bin",
    "transformer.h.28.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.28.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.28.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.28.self_attention.dense.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.28.self_attention.query_key_value.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.29.input_layernorm.bias": "pytorch_model-00002-of-00002.bin",
    "transformer.h.29.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.29.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.29.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.29.self_attention.dense.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.29.self_attention.query_key_value.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.3.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
    "transformer.h.3.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.3.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.3.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.3.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.3.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.30.input_layernorm.bias": "pytorch_model-00002-of-00002.bin",
    "transformer.h.30.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.30.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.30.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.30.self_attention.dense.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.30.self_attention.query_key_value.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.31.input_layernorm.bias": "pytorch_model-00002-of-00002.bin",
    "transformer.h.31.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.31.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.31.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.31.self_attention.dense.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.31.self_attention.query_key_value.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.h.4.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
    "transformer.h.4.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.4.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.4.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.4.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.4.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.5.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
    "transformer.h.5.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.5.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.5.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.5.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.5.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.6.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
    "transformer.h.6.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.6.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.6.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.6.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.6.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.7.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
    "transformer.h.7.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.7.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.7.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.7.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.7.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.8.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
    "transformer.h.8.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.8.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.8.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.8.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.8.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.9.input_layernorm.bias": "pytorch_model-00001-of-00002.bin",
    "transformer.h.9.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.9.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.9.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.9.self_attention.dense.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.h.9.self_attention.query_key_value.weight": "pytorch_model-00001-of-00002.bin",
    "transformer.ln_f.bias": "pytorch_model-00002-of-00002.bin",
    "transformer.ln_f.weight": "pytorch_model-00002-of-00002.bin",
    "transformer.word_embeddings.weight": "pytorch_model-00001-of-00002.bin"
  }
 }
--- a/special_tokens_map.json
+++ b/special_tokens_map.json
@@ -0,0 +1,16 @@
 {
  "additional_special_tokens": [
    ">>TITLE<<",
    ">>ABSTRACT<<",
    ">>INTRODUCTION<<",
    ">>SUMMARY<<",
    ">>COMMENT<<",
    ">>ANSWER<<",
    ">>QUESTION<<",
    ">>DOMAIN<<",
    ">>PREFIX<<",
    ">>SUFFIX<<",
    ">>MIDDLE<<"
  ],
  "eos_token": "<|endoftext|>"
 }
--- a/tokenizer.json
+++ b/tokenizer.json
--- a/tokenizer_config.json
+++ b/tokenizer_config.json
@@ -0,0 +1,7 @@
 {
  "add_prefix_space": false,
  "clean_up_tokenization_spaces": true,
  "eos_token": "<|endoftext|>",
  "model_max_length": 2048,
  "tokenizer_class": "PreTrainedTokenizerFast"
 }