初始化项目，由ModelHub XC社区提供模型

Model: ayoolaolafenwa/ChatLM Source: Original Platform
2026-05-26 04:23:17 +08:00
commit 2464b344e2
12 changed files with 151953 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
--- a/README.md
+++ b/README.md
@@ -0,0 +1,145 @@
+---
+license: apache-2.0
+datasets:
+- ayoolaolafenwa/sft-data
+language:
+- en
+---
+
+## ChatLM 
+It is a chat Large Language Model finetuned with pretrained [Falcon-1B model](https://huggingface.co/tiiuae/falcon-rw-1b)
+and trained on [chat-bot-instructions prompts dataset](https://huggingface.co/datasets/ayoolaolafenwa/sft-data).
+ChatLM was trained on a dataset containing normal day to day human conversations, due to limited data used in training
+it does not generalize well for tasks like coding, current affairs and hallucinations may occur. 
+
+# Github Repo: https://github.com/ayoolaolafenwa/ChatLM
+
+# Have a live chat with ChatLM on space https://huggingface.co/spaces/ayoolaolafenwa/ChatLM
+
+# Install Required Packages
+```
+pip install transformers
+pip install accelerate
+pip install einops
+pip install bitsandbytes
+```
+
+## Load Model in bfloat16
+``` python
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+model_path = "ayoolaolafenwa/ChatLM"
+
+tokenizer = AutoTokenizer.from_pretrained(model_path)
+
+model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code = True,
+torch_dtype=torch.bfloat16).to("cuda")
+
+prompt = "<user>: Give me a financial advise on investing in stocks. <chatbot>: "
+
+tokens = tokenizer(prompt, return_tensors="pt")
+
+token_ids = tokens.input_ids
+attention_mask=tokens.attention_mask
+
+token_ids = token_ids.to(model.device)
+attention_mask=attention_mask.to(model.device)
+
+outputs = model.generate(input_ids=token_ids, attention_mask = attention_mask,  max_length=2048,do_sample=True,
+num_return_sequences=1,top_k = 10, temperature = 0.7, eos_token_id=tokenizer.eos_token_id)
+
+output_text = tokenizer.decode(outputs[0])
+output_text = output_text.replace("<|endoftext|>", "")
+
+print(output_text)
+```
+
+## Load Model in bfloat16 and int8
+``` python
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+model_path = "ayoolaolafenwa/ChatLM"
+
+tokenizer = AutoTokenizer.from_pretrained(model_path)
+
+model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code = True,
+torch_dtype=torch.bfloat16, load_in_8bit=True)
+
+prompt = "<user>: Give me a financial advise on investing in stocks. <chatbot>: "
+
+tokens = tokenizer(prompt, return_tensors="pt")
+
+token_ids = tokens.input_ids
+attention_mask=tokens.attention_mask
+
+token_ids = token_ids.to(model.device)
+attention_mask=attention_mask.to(model.device)
+
+outputs = model.generate(input_ids=token_ids, attention_mask = attention_mask,  max_length=2048,do_sample=True,
+num_return_sequences=1,top_k = 10, temperature = 0.7, eos_token_id=tokenizer.eos_token_id)
+
+output_text = tokenizer.decode(outputs[0])
+output_text = output_text.replace("<|endoftext|>", "")
+
+print(output_text)
+```
+# Training procedure for Supervised Finetuning
+
+## Dataset Preparation
+
+Chatbot Instructions prompts dataset from https://huggingface.co/datasets/alespalla/chatbot_instruction_prompts/viewer/alespalla--chatbot_instruction_prompts
+was processed into a supervised finetuning format for training a user prompt and a corresponding response.
+
+##### Download Data
+``` python
+from datasets import load_dataset
+
+dataset = load_dataset("alespalla/chatbot_instruction_prompts", split = "train")
+dataset.save_to_disk('ChatBotInsP')
+dataset.to_csv('CIPtrain.csv')
+```
+
+##### Code to process dataset into Supervised finetuning format
+``` python
+# Import pandas library
+import pandas as pd
+
+# Read the text dataset from csv file
+text_data = pd.read_csv("CIPtrain.csv")
+
+# Create empty lists for prompts and responses
+prompts = []
+responses = []
+
+# Loop through the text data
+for i in range(len(text_data)):
+    # Get the sender, message, and timestamp of the current row
+    prompt = text_data["prompt"][i]
+    prompt = str(prompt)
+
+    response = text_data["response"][i]
+    response = str(response)
+    
+    # Add the message to the prompts list with <user> tag
+    prompts.append("<user>: " + prompt)
+    
+    # Add the message to the responses list with <chatbot> tag
+    responses.append("<chatbot>: " + response)
+
+# Create a new dataframe with prompts and responses columns
+new_data = pd.DataFrame({"prompt": prompts, "response": responses})
+
+#alespalla/chatbot_instruction_prompts
+# Write the new dataframe to a csv file
+new_data.to_csv("MyData/chatbot_instruction_prompts_train.csv", index=False)
+```
+The users` prompts in the dataset are appended with the tag <user> and the corresponding responses with the tag <chatbot>.
+Check the the modified dataset https://huggingface.co/datasets/ayoolaolafenwa/sft-data .
+
+### Training 
+
+ChatLM was supervised finetuned with pretrained [Falcon 1-Billion parameters model](https://huggingface.co/tiiuae/falcon-rw-1b) trained on 350-Billion tokens 
+of RefinedWeb. It was trained with a single H100 GPU for 1 epoch. It achieves Perplexity *1.738*.  Check the full code for supervised finetune 
+training on its github repository https://github.com/ayoolaolafenwa/ChatLM/tree/main
--- a/config.json
+++ b/config.json
@@ -0,0 +1,33 @@
+{
+  "alibi": true,
+  "apply_residual_connection_post_layernorm": false,
+  "architectures": [
+    "FalconForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "auto_map": {
+    "AutoConfig": "configuration_falcon.FalconConfig",
+    "AutoModel": "modeling_falcon.FalconModel",
+    "AutoModelForSequenceClassification": "modeling_falcon.FalconForSequenceClassification",
+    "AutoModelForTokenClassification": "modeling_falcon.FalconForTokenClassification",
+    "AutoModelForQuestionAnswering": "modeling_falcon.FalconForQuestionAnswering",
+    "AutoModelForCausalLM": "modeling_falcon.FalconForCausalLM"
+  },
+  "bias": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_dropout": 0.0,
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "falcon",
+  "multi_query": false,
+  "new_decoder_architecture": false,
+  "num_attention_heads": 32,
+  "num_hidden_layers": 24,
+  "parallel_attn": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.27.4",
+  "use_cache": true,
+  "vocab_size": 50304
+}
--- a/configuration_falcon.py
+++ b/configuration_falcon.py
@@ -0,0 +1,147 @@
+# coding=utf-8
+# Copyright 2023 the Falcon authors and HuggingFace Inc. team.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" Falcon configuration"""
+from transformers.configuration_utils import PretrainedConfig
+from transformers.utils import logging
+
+
+logger = logging.get_logger(__name__)
+
+FALCON_PRETRAINED_CONFIG_ARCHIVE_MAP = {
+    "tiiuae/falcon-40b": "https://huggingface.co/tiiuae/falcon-40b/resolve/main/config.json",
+    "tiiuae/falcon-7b": "https://huggingface.co/tiiuae/falcon-7b/resolve/main/config.json",
+}
+
+
+class FalconConfig(PretrainedConfig):
+    r"""
+    This is the configuration class to store the configuration of a [`FalconModel`]. It is used to instantiate a Falcon
+    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
+    defaults will yield a similar configuration to that of the
+    [tiiuae/falcon-7b](https://huggingface.co/tiiuae/falcon-7b) architecture.
+
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+    documentation from [`PretrainedConfig`] for more information.
+
+
+    Args:
+        vocab_size (`int`, *optional*, defaults to 65024):
+            Vocabulary size of the Falcon model. Defines the number of different tokens that can be represented by the
+            `inputs_ids` passed when calling [`FalconModel`]
+        hidden_size (`int`, *optional*, defaults to 4544):
+            Dimension of the hidden representations.
+        num_hidden_layers (`int`, *optional*, defaults to 32):
+            Number of hidden layers in the Transformer decoder.
+        num_attention_heads (`int`, *optional*, defaults to 71):
+            Number of attention heads for each attention layer in the Transformer encoder.
+        initializer_range (`float`, *optional*, defaults to 0.02):
+            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+        use_cache (`bool`, *optional*, defaults to `True`):
+            Whether the model should return the last key/values attentions (not used by all models). Only relevant if
+            `config.is_decoder=True`.
+        layer_norm_epsilon (`float`, *optional*, defaults to 1e-5):
+            The epsilon used by the layer normalization layers.
+        hidden_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout probability for MLP layers.
+        attention_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout probability for attention layers.
+        num_kv_heads (`int`, *optional*):
+            Number of key-value heads to use per attention layer. If unset, defaults to the same value as
+            `num_attention_heads`.
+        alibi (`bool`, *optional*, defaults to `False`):
+            Whether to use ALiBi positional biases during self-attention.
+        new_decoder_architecture (`bool`, *optional*, defaults to `False`):
+            Whether to use the new (Falcon-40B) decoder architecture. If `True`, the `multi_query` and `parallel_attn`
+            arguments are ignored, as the new decoder always uses parallel attention.
+        multi_query (`bool`, *optional*, defaults to `True`):
+            Whether to use multi-query attention in the decoder. Ignored when `new_decoder_architecture` is `True`.
+        parallel_attn (`bool`, *optional*, defaults to `True`):
+            Whether to compute attention in parallel with the feedforward layer. If False, they are consecutive
+            instead, as in the original Transformer architecture. Ignored when `new_decoder_architecture` is `True`.
+        bias (`bool`, *optional*, defaults to `False`):
+            Whether to use bias on Linear layers.
+        bos_token_id (`int`, *optional*, defaults to 11):
+            The id of the "beginning-of-sequence" token.
+        eos_token_id (`int`, *optional*, defaults to 11):
+            The id of the "end-of-sequence" token.
+
+    Example:
+
+    ```python
+    >>> from transformers import FalconModel, FalconConfig
+
+    >>> # Initializing a small (2-layer) Falcon configuration
+    >>> configuration = FalconConfig(num_hidden_layers=2)
+
+    >>> # Initializing a model from the small configuration
+    >>> model = FalconModel(configuration)
+
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```"""
+    model_type = "falcon"
+    keys_to_ignore_at_inference = ["past_key_values"]
+
+    def __init__(
+        self,
+        vocab_size=65024,
+        hidden_size=4544,
+        num_hidden_layers=32,
+        num_attention_heads=71,
+        layer_norm_epsilon=1e-5,
+        initializer_range=0.02,
+        use_cache=True,
+        hidden_dropout=0.0,
+        attention_dropout=0.0,
+        num_kv_heads=None,
+        alibi=False,
+        new_decoder_architecture=False,
+        multi_query=True,
+        parallel_attn=True,
+        bias=False,
+        bos_token_id=11,
+        eos_token_id=11,
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        # Backward compatibility with n_embed kwarg
+        n_embed = kwargs.pop("n_embed", None)
+        self.hidden_size = hidden_size if n_embed is None else n_embed
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.layer_norm_epsilon = layer_norm_epsilon
+        self.initializer_range = initializer_range
+        self.use_cache = use_cache
+        self.hidden_dropout = hidden_dropout
+        self.attention_dropout = attention_dropout
+
+        self.bos_token_id = bos_token_id
+        self.eos_token_id = eos_token_id
+        self.num_kv_heads = num_attention_heads if num_kv_heads is None else num_kv_heads
+        self.alibi = alibi
+        self.new_decoder_architecture = new_decoder_architecture
+        self.multi_query = multi_query  # Ignored when new_decoder_architecture is True
+        self.parallel_attn = parallel_attn
+        self.bias = bias
+
+        super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
+
+    @property
+    def head_dim(self):
+        return self.hidden_size // self.num_attention_heads
+
+    @property
+    def rotary(self):
+        return not self.alibi
--- a/generation_config.json
+++ b/generation_config.json
@@ -0,0 +1,6 @@
+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "transformers_version": "4.31.0.dev0"
+}
--- a/merges.txt
+++ b/merges.txt
--- a/modeling_falcon.py
+++ b/modeling_falcon.py
--- a/pytorch_model.bin
+++ b/pytorch_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e2ee34512fceb92d9fdc5ea788d6467dda83bf62b261189627439b6410132d8
+size 5246595929
--- a/special_tokens_map.json
+++ b/special_tokens_map.json
@@ -0,0 +1,6 @@
+{
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "pad_token": "<|endoftext|>",
+  "unk_token": "<|endoftext|>"
+}
--- a/tokenizer.json
+++ b/tokenizer.json
--- a/tokenizer_config.json
+++ b/tokenizer_config.json
@@ -0,0 +1,9 @@
+{
+  "add_prefix_space": false,
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 1024,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}
--- a/vocab.json
+++ b/vocab.json