From 5aea4b8bd60b070a0be960d92f1c759e90915434 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Tue, 28 Apr 2026 22:16:51 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: afrideva/MiniChat-2-3B-GGUF Source: Original Platform --- .gitattributes | 42 ++++++++++++ README.md | 137 ++++++++++++++++++++++++++++++++++++++ minichat-2-3b.fp16.gguf | 3 + minichat-2-3b.q2_k.gguf | 3 + minichat-2-3b.q3_k_m.gguf | 3 + minichat-2-3b.q4_k_m.gguf | 3 + minichat-2-3b.q5_k_m.gguf | 3 + minichat-2-3b.q6_k.gguf | 3 + minichat-2-3b.q8_0.gguf | 3 + 9 files changed, 200 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 minichat-2-3b.fp16.gguf create mode 100644 minichat-2-3b.q2_k.gguf create mode 100644 minichat-2-3b.q3_k_m.gguf create mode 100644 minichat-2-3b.q4_k_m.gguf create mode 100644 minichat-2-3b.q5_k_m.gguf create mode 100644 minichat-2-3b.q6_k.gguf create mode 100644 minichat-2-3b.q8_0.gguf diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..979899f --- /dev/null +++ b/.gitattributes @@ -0,0 +1,42 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +minichat-2-3b.fp16.gguf filter=lfs diff=lfs merge=lfs -text +minichat-2-3b.q2_k.gguf filter=lfs diff=lfs merge=lfs -text +minichat-2-3b.q3_k_m.gguf filter=lfs diff=lfs merge=lfs -text +minichat-2-3b.q4_k_m.gguf filter=lfs diff=lfs merge=lfs -text +minichat-2-3b.q5_k_m.gguf filter=lfs diff=lfs merge=lfs -text +minichat-2-3b.q6_k.gguf filter=lfs diff=lfs merge=lfs -text +minichat-2-3b.q8_0.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..f9ed8dc --- /dev/null +++ b/README.md @@ -0,0 +1,137 @@ +--- +base_model: GeneZC/MiniChat-2-3B +inference: false +language: +- en +- zh +library_name: transformers +license: apache-2.0 +model_creator: GeneZC +model_name: MiniChat-2-3B +pipeline_tag: text-generation +quantized_by: afrideva +tags: +- gguf +- ggml +- quantized +- q2_k +- q3_k_m +- q4_k_m +- q5_k_m +- q6_k +- q8_0 +widget: +- text: " [|User|] Hi \U0001F44B [|Assistant|]" +--- +# GeneZC/MiniChat-2-3B-GGUF + +Quantized GGUF model files for [MiniChat-2-3B](https://huggingface.co/GeneZC/MiniChat-2-3B) from [GeneZC](https://huggingface.co/GeneZC) + + +| Name | Quant method | Size | +| ---- | ---- | ---- | +| [minichat-2-3b.fp16.gguf](https://huggingface.co/afrideva/MiniChat-2-3B-GGUF/resolve/main/minichat-2-3b.fp16.gguf) | fp16 | 6.04 GB | +| [minichat-2-3b.q2_k.gguf](https://huggingface.co/afrideva/MiniChat-2-3B-GGUF/resolve/main/minichat-2-3b.q2_k.gguf) | q2_k | 1.30 GB | +| [minichat-2-3b.q3_k_m.gguf](https://huggingface.co/afrideva/MiniChat-2-3B-GGUF/resolve/main/minichat-2-3b.q3_k_m.gguf) | q3_k_m | 1.51 GB | +| [minichat-2-3b.q4_k_m.gguf](https://huggingface.co/afrideva/MiniChat-2-3B-GGUF/resolve/main/minichat-2-3b.q4_k_m.gguf) | q4_k_m | 1.85 GB | +| [minichat-2-3b.q5_k_m.gguf](https://huggingface.co/afrideva/MiniChat-2-3B-GGUF/resolve/main/minichat-2-3b.q5_k_m.gguf) | q5_k_m | 2.15 GB | +| [minichat-2-3b.q6_k.gguf](https://huggingface.co/afrideva/MiniChat-2-3B-GGUF/resolve/main/minichat-2-3b.q6_k.gguf) | q6_k | 2.48 GB | +| [minichat-2-3b.q8_0.gguf](https://huggingface.co/afrideva/MiniChat-2-3B-GGUF/resolve/main/minichat-2-3b.q8_0.gguf) | q8_0 | 3.21 GB | + + + +## Original Model Card: +## MiniChat-2-3B + +📑 [arXiv](https://arxiv.org/abs/2311.07052) | 👻 [GitHub](https://github.com/GeneZC/MiniMA) | 🤗 [HuggingFace-MiniMA](https://huggingface.co/GeneZC/MiniMA-3B) | 🤗 [HuggingFace-MiniChat](https://huggingface.co/GeneZC/MiniChat-3B) | 🤖 [ModelScope-MiniMA](https://modelscope.cn/models/GeneZC/MiniMA-3B) | 🤖 [ModelScope-MiniChat](https://modelscope.cn/models/GeneZC/MiniChat-3B) | 🤗 [HuggingFace-MiniChat-1.5](https://huggingface.co/GeneZC/MiniChat-1.5-3B) | 🤗 [HuggingFace-MiniMA-2](https://huggingface.co/GeneZC/MiniMA-2-3B) | 🤗 [HuggingFace-MiniChat-2](https://huggingface.co/GeneZC/MiniChat-2-3B) + +🆕 **Updates from MiniChat-3B**: +- better base model MiniMA-2-3B; +- better data mixture; +- use of [NEFTune](https://arxiv.org/abs/2310.05914); +- use of [DPO](https://arxiv.org/abs/2305.18290). + +❗ Must comply with LICENSE of LLaMA2 since it is derived from LLaMA2. + +A language model continued from MiniMA-3B and finetuned on both instruction and preference data. + +Surpassing Vicuna-7B and approximating LLaMA-2-Chat-7B on MT-Bench. + +teaser_b + +**Standard Benchmarks** + +|Method|TFLOPs|MMLU (5-shot)|CEval (5-shot)|DROP (3-shot)|HumanEval (0-shot)|BBH (3-shot)|GSM8K (8-shot)| +|--|--|--|--|--|--|--|--| +|Mamba-2.8B|4.6E9|25.58|24.74|15.72|7.32|29.37|3.49| +|ShearedLLaMA-2.7B|0.8E9|26.97|22.88|19.98|4.88|30.48|3.56| +|BTLM-3B|11.3E9|27.20|26.00|17.84|10.98|30.87|4.55| +|StableLM-3B|72.0E9|44.75|31.05|22.35|15.85|32.59|10.99| +|Qwen-1.8B|23.8E9|44.05|54.75|12.97|14.02|30.80|22.97| +|Phi-2-2.8B|159.9E9|56.74|34.03|30.74|46.95|44.13|55.42| +|LLaMA-2-7B|84.0E9|46.00|34.40|31.57|12.80|32.02|14.10| +|| +|MiniMA-3B|4.0E9|28.51|28.23|22.50|10.98|31.61|8.11| +|MiniChat-3B|4.0E9|38.40|36.48|22.58|18.29|31.36|29.72| +|MiniMA-2-3B|13.4E9|40.14|44.65|23.10|14.63|31.43|8.87| +|MiniChat-2-3B|13.4E9|46.17|43.91|30.26|22.56|34.95|38.13| + +**Instruction-following Benchmarks** + +|Method|AlpacaEval|MT-Bench| +|--|--|--| +|GPT-4|95.28|9.18| +|Zephyr-7B-Beta|90.60|7.34| +|Phi-2-DPO|81.37|-| +|StableLM Zephyr 3B|76.00|6.64| +|Vicuna-7B|76.84|6.17| +|LLaMA-2-Chat-7B|71.37|6.27| +|| +|MiniChat-3B|48.82|-| +|MiniChat-2-3B|77.30|6.23| + +The following is an example code snippet to use MiniChat-2-3B: + +```python +import torch + +from transformers import AutoModelForCausalLM, AutoTokenizer + +from conversation import get_default_conv_template + +# MiniChat +tokenizer = AutoTokenizer.from_pretrained("GeneZC/MiniChat-2-3B", use_fast=False) +# GPU. +model = AutoModelForCausalLM.from_pretrained("GeneZC/MiniChat-2-3B", use_cache=True, device_map="auto", torch_dtype=torch.float16).eval() +# CPU. +# model = AutoModelForCausalLM.from_pretrained("GeneZC/MiniChat-2-3B", use_cache=True, device_map="cpu", torch_dtype=torch.float16).eval() + +conv = get_default_conv_template("minichat") + +question = "Implement a program to find the common elements in two arrays without using any extra data structures." +conv.append_message(conv.roles[0], question) +conv.append_message(conv.roles[1], None) +prompt = conv.get_prompt() +input_ids = tokenizer([prompt]).input_ids +output_ids = model.generate( + torch.as_tensor(input_ids).cuda(), + do_sample=True, + temperature=0.7, + max_new_tokens=1024, +) +output_ids = output_ids[0][len(input_ids[0]):] +output = tokenizer.decode(output_ids, skip_special_tokens=True).strip() +# output: "def common_elements(arr1, arr2):\n if len(arr1) == 0:\n return []\n if len(arr2) == 0:\n return arr1\n\n common_elements = []\n for element in arr1:\n if element in arr2:\n common_elements.append(element)\n\n return common_elements" +# Multiturn conversation could be realized by continuously appending questions to `conv`. +``` + +## Bibtex + +```bibtex +@article{zhang2023law, + title={Towards the Law of Capacity Gap in Distilling Language Models}, + author={Zhang, Chen and Song, Dawei and Ye, Zheyu and Gao, Yan}, + year={2023}, + url={https://arxiv.org/abs/2311.07052} +} +``` \ No newline at end of file diff --git a/minichat-2-3b.fp16.gguf b/minichat-2-3b.fp16.gguf new file mode 100644 index 0000000..461b9cb --- /dev/null +++ b/minichat-2-3b.fp16.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6fada3eb966a18d896a59597113f2b687b9d9b3c1290898ebb8635f0ef11954 +size 6042292800 diff --git a/minichat-2-3b.q2_k.gguf b/minichat-2-3b.q2_k.gguf new file mode 100644 index 0000000..bffe775 --- /dev/null +++ b/minichat-2-3b.q2_k.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d294775369399c5abd5a36e5e08b9cb07f459975f839947b5836d4facd96b12a +size 1297187936 diff --git a/minichat-2-3b.q3_k_m.gguf b/minichat-2-3b.q3_k_m.gguf new file mode 100644 index 0000000..a4ae467 --- /dev/null +++ b/minichat-2-3b.q3_k_m.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:930ca03c3928f410ee14007ded7249b421b3c404a1dce58364fd48d1125fa227 +size 1507578464 diff --git a/minichat-2-3b.q4_k_m.gguf b/minichat-2-3b.q4_k_m.gguf new file mode 100644 index 0000000..31b11eb --- /dev/null +++ b/minichat-2-3b.q4_k_m.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5aa5269f9ba067987edb8d22d22dfaa8e48b6d2382c780605dd46903b0004127 +size 1846655072 diff --git a/minichat-2-3b.q5_k_m.gguf b/minichat-2-3b.q5_k_m.gguf new file mode 100644 index 0000000..b42a932 --- /dev/null +++ b/minichat-2-3b.q5_k_m.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50e3febc1eac58c7d0700f2d130760004eaa0cac9074252df46b253d2d1cef07 +size 2153388128 diff --git a/minichat-2-3b.q6_k.gguf b/minichat-2-3b.q6_k.gguf new file mode 100644 index 0000000..105a863 --- /dev/null +++ b/minichat-2-3b.q6_k.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:088b2a5b670155e773c4fd5c9804d57bcd2fd9b3ebc907576191e1118bc73db3 +size 2479292000 diff --git a/minichat-2-3b.q8_0.gguf b/minichat-2-3b.q8_0.gguf new file mode 100644 index 0000000..cf71623 --- /dev/null +++ b/minichat-2-3b.q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0f060ea1cb3492ada3f9d540f10f72d07da355cd43bd4e61c68164bc7010335 +size 3210768992