初始化项目，由ModelHub XC社区提供模型

Model: GeneZC/MiniChat-1.5-3B Source: Original Platform
2026-05-14 02:12:20 +08:00
commit f14ec20fde
10 changed files with 543 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,35 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ckpt filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.mlmodel filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
 *.npy filter=lfs diff=lfs merge=lfs -text
 *.npz filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pickle filter=lfs diff=lfs merge=lfs -text
 *.pkl filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tar filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
--- a/README.md
+++ b/README.md
@@ -0,0 +1,188 @@
 ---
 language:
 - en
 - zh
 license: apache-2.0
 library_name: transformers
 widget:
 - text: <s> [|User|] Hi 👋  </s>[|Assistant|]
 model-index:
 - name: MiniChat-1.5-3B
  results:
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: AI2 Reasoning Challenge (25-Shot)
      type: ai2_arc
      config: ARC-Challenge
      split: test
      args:
        num_few_shot: 25
    metrics:
    - type: acc_norm
      value: 46.5
      name: normalized accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=GeneZC/MiniChat-1.5-3B
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: HellaSwag (10-Shot)
      type: hellaswag
      split: validation
      args:
        num_few_shot: 10
    metrics:
    - type: acc_norm
      value: 68.28
      name: normalized accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=GeneZC/MiniChat-1.5-3B
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: MMLU (5-Shot)
      type: cais/mmlu
      config: all
      split: test
      args:
        num_few_shot: 5
    metrics:
    - type: acc
      value: 46.67
      name: accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=GeneZC/MiniChat-1.5-3B
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: TruthfulQA (0-shot)
      type: truthful_qa
      config: multiple_choice
      split: validation
      args:
        num_few_shot: 0
    metrics:
    - type: mc2
      value: 50.71
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=GeneZC/MiniChat-1.5-3B
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: Winogrande (5-shot)
      type: winogrande
      config: winogrande_xl
      split: validation
      args:
        num_few_shot: 5
    metrics:
    - type: acc
      value: 65.04
      name: accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=GeneZC/MiniChat-1.5-3B
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: GSM8k (5-shot)
      type: gsm8k
      config: main
      split: test
      args:
        num_few_shot: 5
    metrics:
    - type: acc
      value: 24.18
      name: accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=GeneZC/MiniChat-1.5-3B
      name: Open LLM Leaderboard
 ---
 ## MiniChat-1.5-3B
 📑 [arXiv](https://arxiv.org/abs/2311.07052) | 👻 [GitHub](https://github.com/GeneZC/MiniMA) | 🤗 [HuggingFace-MiniMA](https://huggingface.co/GeneZC/MiniMA-3B) | 🤗 [HuggingFace-MiniChat](https://huggingface.co/GeneZC/MiniChat-3B) | 🤗 [HuggingFace-MiniChat-1.5](https://huggingface.co/GeneZC/MiniChat-1.5-3B) | 🤖 [ModelScope-MiniMA](https://modelscope.cn/models/GeneZC/MiniMA-3B) | 🤖 [ModelScope-MiniChat](https://modelscope.cn/models/GeneZC/MiniChat-3B)
 🆕 **Updates from MiniChat-3B**: 
 - better data mixture;
 - use of [NEFTune](https://arxiv.org/abs/2310.05914);
 - use of [DPO](https://arxiv.org/abs/2305.18290).
 ❗ Must comply with LICENSE of LLaMA2 since it is derived from LLaMA2.
 A language model distilled and finetuned from an adapted version of LLaMA2-7B following "Towards the Law of Capacity Gap in Distilling Language Models".
 Outperforming a wide range of 3B competitors in GPT4 evaluation and even competing with several 7B chat models.
 <img src="./teaser_b.jpg" alt="teaser_b" width="687" />
 The following is an example code snippet to use MiniChat-3B:
 ```python
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from conversation import get_default_conv_template
 # MiniChat
 tokenizer = AutoTokenizer.from_pretrained("GeneZC/MiniChat-3B", use_fast=False)
 # GPU.
 model = AutoModelForCausalLM.from_pretrained("GeneZC/MiniChat-3B", use_cache=True, device_map="auto", torch_dtype=torch.float16).eval()
 # CPU.
 # model = AutoModelForCausalLM.from_pretrained("GeneZC/MiniChat-3B", use_cache=True, device_map="cpu", torch_dtype=torch.float16).eval()
 conv = get_default_conv_template("minichat")
 question = "Implement a program to find the common elements in two arrays without using any extra data structures."
 conv.append_message(conv.roles[0], question)
 conv.append_message(conv.roles[1], None)
 prompt = conv.get_prompt()
 input_ids = tokenizer([prompt]).input_ids
 output_ids = model.generate(
    torch.as_tensor(input_ids).cuda(),
    do_sample=True,
    temperature=0.7,
    max_new_tokens=1024,
 )
 output_ids = output_ids[0][len(input_ids[0]):]
 output = tokenizer.decode(output_ids, skip_special_tokens=True).strip()
 # output: "def common_elements(arr1, arr2):\n    if len(arr1) == 0:\n        return []\n    if len(arr2) == 0:\n        return arr1\n\n    common_elements = []\n    for element in arr1:\n        if element in arr2:\n            common_elements.append(element)\n\n    return common_elements"
 # Multiturn conversation could be realized by continuously appending questions to `conv`.
 ```
 ## Bibtex
 ```bibtex
@article{zhang2023law,
    title={Towards the Law of Capacity Gap in Distilling Language Models},
    author={Zhang, Chen and Song, Dawei and Ye, Zheyu and Gao, Yan},
    year={2023},
    url={https://arxiv.org/abs/2311.07052}
 }
 ```
 # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
 Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_GeneZC__MiniChat-1.5-3B)
 |             Metric              |Value|
 |---------------------------------|----:|
 |Avg.                             |50.23|
 |AI2 Reasoning Challenge (25-Shot)|46.50|
 |HellaSwag (10-Shot)              |68.28|
 |MMLU (5-Shot)                    |46.67|
 |TruthfulQA (0-shot)              |50.71|
 |Winogrande (5-shot)              |65.04|
 |GSM8k (5-shot)                   |24.18|
--- a/config.json
+++ b/config.json
@@ -0,0 +1,27 @@
 {
  "_name_or_path": "MiniChat-1.5-DPO-3B",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 3072,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 4096,
  "model_type": "llama",
  "num_attention_heads": 24,
  "num_hidden_layers": 24,
  "num_key_value_heads": 24,
  "pad_token_id": 0,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-05,
  "rope_scaling": null,
  "rope_theta": 10000.0,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.33.2",
  "use_cache": true,
  "vocab_size": 49216
 }
--- a/conversation.py
+++ b/conversation.py
@@ -0,0 +1,223 @@
 """
 Conversation prompt templates.
 """
 import dataclasses
 from enum import auto, Enum
 from typing import List, Tuple, Any
 class SeparatorStyle(Enum):
    """Different separator style."""
    ADD_COLON_SINGLE = auto()
    ADD_COLON_TWO = auto()
    NO_COLON_SINGLE = auto()
    BAIZE = auto()
    PHOENIX = auto()
    MINICHAT = auto()
@dataclasses.dataclass
 class Conversation:
    """A class that keeps all conversation history."""
    # System prompts
    system: str
    # Two roles
    roles: List[str]
    # All messages
    messages: List[List[str]]
    # Offset of few shot examples
    offset: int
    # Separator
    sep_style: SeparatorStyle
    sep: str
    sep2: str = None
    # Stop criteria (the default one is EOS token)
    stop_str: str = None
    # Stops generation if meeting any token in this list
    stop_token_ids: List[int] = None
    # Used for the state in the gradio servers.
    # TODO(lmzheng): refactor this
    conv_id: Any = None
    skip_next: bool = False
    model_name: str = None
    def get_prompt(self):
        if self.sep_style == SeparatorStyle.ADD_COLON_SINGLE:
            ret = self.system + self.sep
            for role, message in self.messages:
                if message:
                    ret += role + ": " + message + self.sep
                else:
                    ret += role + ": "
            return ret
        elif self.sep_style == SeparatorStyle.ADD_COLON_TWO:
            seps = [self.sep, self.sep2]
            ret = self.system + seps[0]
            for i, (role, message) in enumerate(self.messages):
                if message:
                    ret += role + ": " + message + seps[i % 2]
                else:
                    ret += role + ": "
            return ret
        elif self.sep_style == SeparatorStyle.NO_COLON_SINGLE:
            ret = self.system
            for role, message in self.messages:
                if message:
                    ret += role + message + self.sep
                else:
                    ret += role
            return ret
        elif self.sep_style == SeparatorStyle.BAIZE:
            ret = self.system + "\n"
            for role, message in self.messages:
                if message:
                    ret += role + message + "\n"
                else:
                    ret += role
            return ret
        elif self.sep_style == SeparatorStyle.PHOENIX:
            ret = self.system
            for role, message in self.messages:
                if message:
                    ret += role + ": " + "<s>" + message + "</s>"
                else:
                    ret += role + ": " + "<s>"
            return ret
        elif self.sep_style == SeparatorStyle.MINICHAT:
            ret = self.system
            for role, message in self.messages:
                if message:
                    ret += role + " " + message + "</s>"
                else:
                    ret += role # No space is needed.
            return ret
        else:
            raise ValueError(f"Invalid style: {self.sep_style}")
    def append_message(self, role, message):
        self.messages.append([role, message])
    def to_gradio_chatbot(self):
        ret = []
        for i, (role, msg) in enumerate(self.messages[self.offset:]):
            if i % 2 == 0:
                ret.append([msg, None])
            else:
                ret[-1][-1] = msg
        return ret
    def to_openai_api_messages(self):
        ret = [{"role": "system", "content": self.system}]
        for i, (_, msg) in enumerate(self.messages[self.offset:]):
            if i % 2 == 0:
                ret.append({"role": "user", "content": msg})
            else:
                if msg is not None:
                    ret.append({"role": "assistant", "content": msg})
        return ret
    def copy(self):
        return Conversation(
            system=self.system,
            roles=self.roles,
            messages=[[x, y] for x, y in self.messages],
            offset=self.offset,
            sep_style=self.sep_style,
            sep=self.sep,
            sep2=self.sep2,
            stop_str=self.stop_str,
            stop_token_ids=self.stop_token_ids,
            conv_id=self.conv_id,
            model_name=self.model_name,
        )
    def dict(self):
        return {
            "system": self.system,
            "roles": self.roles,
            "messages": self.messages,
            "offset": self.offset,
            "conv_id": self.conv_id,
            "model_name": self.model_name,
        }
 conv_vicuna = Conversation(
    system="A chat between a curious user and an artificial intelligence assistant. "
    "The assistant gives helpful, detailed, and polite answers to the user's questions.",
    roles=("USER", "ASSISTANT"),
    messages=(),
    offset=0,
    sep_style=SeparatorStyle.ADD_COLON_TWO,
    sep=" ",
    sep2="</s>",
 )
 conv_baize = Conversation(
    system="The following is a conversation between a human and an AI assistant named Baize (named after a mythical creature in Chinese folklore). Baize is an open-source AI assistant developed by UCSD and Sun Yat-Sen University. The human and the AI assistant take turns chatting. Human statements start with [|Human|] and AI assistant statements start with [|AI|]. The AI assistant always provides responses in as much detail as possible, and in Markdown format. The AI assistant always declines to engage with topics, questions and instructions related to unethical, controversial, or sensitive issues. Complete the transcript in exactly that format.\n",
    roles=("[|Human|]", "[|AI|]"),
    messages=(
        ("[|Human|]", "Hello!"),
        ("[|AI|]", "Hi!"),
    ),
    offset=2,
    sep_style=SeparatorStyle.BAIZE,
    sep="\n",
    stop_str="[|Human|]",
 )
 conv_phoenix = Conversation(
    system="A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n",
    roles=("Human", "Assistant"),
    messages=(),
    offset=0,
    sep_style=SeparatorStyle.PHOENIX,
    sep="</s>",
 )
 conv_chatgpt = Conversation(
    system="You are a helpful assistant.",
    roles=("user", "assistant"),
    messages=(),
    offset=0,
    sep_style=None,
    sep=None,
 )
 conv_minichat = Conversation(
    system="‘MiniChat’是一个由‘Beccurio’开发的AI语言模型。下面是人类和MiniChat之间的一段对话。MiniChat的回复应当尽可能详细，并且以Markdown的形式输出。MiniChat应当拒绝参与违背伦理的讨论。</s>",
    roles=("[|User|]", "[|Assistant|]"),
    messages=(),
    offset=0,
    sep_style=SeparatorStyle.MINICHAT,
    sep="</s>",
 )
 conv_templates = {
    "vicuna": conv_vicuna,
    "baize": conv_baize,
    "phoenix": conv_phoenix,
    "chatgpt": conv_chatgpt,
    "minichat": conv_minichat,
 }
 def get_default_conv_template(model_name):
    model_name = model_name.lower()
    try:
        ret = conv_templates[model_name]
        return ret.copy()
    except:
        raise NotImplementedError(f"No support for model {model_name}.")
 if __name__ == "__main__":
    conv = conv_templates["minichat"].copy()
    conv.append_message(conv.roles[0], "Write a Python function that checks if a given number is even or odd.")
    conv.append_message(conv.roles[1], None)
    print([conv.get_prompt()])
--- a/model.safetensors
+++ b/model.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:268f61b9bbd207a8a9f5303462a3cecc161b819223e21353dca008786c8b1ca5
 size 6040910600
--- a/pytorch_model.bin
+++ b/pytorch_model.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:6af991bdb553810f86f2ac2cbf964bafdc111e1239c63f74ca4e2dabdddafb30
 size 6040956605
--- a/special_tokens_map.json
+++ b/special_tokens_map.json
@@ -0,0 +1,23 @@
 {
  "bos_token": {
    "content": "<s>",
    "lstrip": false,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  },
  "eos_token": {
    "content": "</s>",
    "lstrip": false,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  },
  "unk_token": {
    "content": "<unk>",
    "lstrip": false,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  }
 }
--- a/teaser_b.jpg
+++ b/teaser_b.jpg
--- a/tokenizer.model
+++ b/tokenizer.model
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:ae87c0db2b21b0fa3fdc5e19d1f9cea94efb703cc7c6281d8718a6714b3cc2be
 size 748869
--- a/tokenizer_config.json
+++ b/tokenizer_config.json
@@ -0,0 +1,38 @@
 {
  "add_bos_token": true,
  "add_eos_token": false,
  "bos_token": {
    "__type": "AddedToken",
    "content": "<s>",
    "lstrip": false,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  },
  "clean_up_tokenization_spaces": false,
  "eos_token": {
    "__type": "AddedToken",
    "content": "</s>",
    "lstrip": false,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  },
  "legacy": null,
  "model_max_length": 1000000000000000019884624838656,
  "pad_token": null,
  "sp_model_kwargs": {},
  "spaces_between_special_tokens": false,
  "tokenizer_class": "LlamaTokenizer",
  "unk_token": {
    "__type": "AddedToken",
    "content": "<unk>",
    "lstrip": false,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  },
  "use_default_system_prompt": true,
  "chat_template": "{{ '‘MiniChat’是一个由‘Beccurio’开发的AI语言模型。下面是人类和MiniChat之间的一段对话。MiniChat的回复应当尽可能详细，并且以Markdown的形式输出。MiniChat应当拒绝参与违背伦理的讨论。</s>' }}{% for message in messages %}{{'[|' + message['role'].capitalize() + '|] ' + message['content'] + '</s>'}}{% endfor %}{{ '[|Assistant|]' }}",
  "use_fast": true
 }