commit 8bf8990745c7a1eccee271bda436319c627659a9 Author: ModelHub XC Date: Wed Apr 29 08:48:26 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: prism-ml/Ternary-Bonsai-8B-gguf Source: Original Platform diff --git a/.eval_results/gsm8k.yaml b/.eval_results/gsm8k.yaml new file mode 100644 index 0000000..bb1ab82 --- /dev/null +++ b/.eval_results/gsm8k.yaml @@ -0,0 +1,7 @@ +- dataset: + id: openai/gsm8k + task_id: gsm8k + value: 91 + source: + url: https://huggingface.co/prism-ml/Bonsai-8B-gguf + name: Model Card \ No newline at end of file diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..a750855 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,40 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text +Ternary-Bonsai-8B-F16.gguf filter=lfs diff=lfs merge=lfs -text +Ternary-Bonsai-8B-Q2_0.gguf filter=lfs diff=lfs merge=lfs -text +Ternary-Bonsai-8B-TQ1_0.gguf filter=lfs diff=lfs merge=lfs -text +Ternary-Bonsai-8B-TQ2_0.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..66a27ec --- /dev/null +++ b/LICENSE @@ -0,0 +1,177 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + diff --git a/NOTICE.txt b/NOTICE.txt new file mode 100644 index 0000000..bd212b6 --- /dev/null +++ b/NOTICE.txt @@ -0,0 +1,4 @@ +This software is copyright 2026-present Prism ML, Inc. It is available under the Apache 2.0 license. +If you publicly deploy or redistribute this software, we would appreciate attribution such as: “Created using Bonsai by Prism ML." + +This software is built from Qwen3-8B, Copyright 2024 Alibaba Cloud, which is available under the Apache 2.0 License: https://huggingface.co/Qwen/Qwen3-8B/blob/main/LICENSE \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..b4d3a28 --- /dev/null +++ b/README.md @@ -0,0 +1,162 @@ +--- +license: apache-2.0 +library_name: gguf +pipeline_tag: text-generation +tags: +- ternary +- 1.58-bit +- gguf +- llama-cpp +- q2_0 +- on-device +- prismml +- bonsai +base_model: +- prism-ml/Ternary-Bonsai-8B-unpacked +--- + +

+ Bonsai +

+ +

+ Prism ML Website  |  + White Paper  |  + Demo & Examples  |  + Discord +

+ +# Ternary-Bonsai-8B-gguf + +Ternary (1.58-bit) language model in GGUF Q2_0 format for `llama.cpp` + + +

+ Pareto Frontier +

+ +## Resources + +- **[White Paper](https://github.com/PrismML-Eng/Bonsai-demo/blob/main/ternary-bonsai-8b-whitepaper.pdf)** +- **[Demo repo](https://github.com/PrismML-Eng/Bonsai-demo)** — examples for serving, benchmarking, and integrating Bonsai +- **[Discord](https://discord.gg/prismml)** — community support and updates +- **Kernels**: Q2_0 is not yet in mainline `llama.cpp`. Use our fork at [PrismML-Eng/llama.cpp](https://github.com/PrismML-Eng/llama.cpp) (`prism` branch, default) which adds Q2_0 support for CPU (NEON/generic) and Metal. Upstream PR coming soon. + +## Model Overview + +| Item | Specification | +| :--------------- | :----------------------------------------------------------------------- | +| Base model | Qwen3-8B | +| Parameters | 8.19B (~6.95B non-embedding) | +| Architecture | GQA (32 query / 8 KV heads), SwiGLU MLP, RoPE, RMSNorm | +| Layers | 36 Transformer decoder blocks | +| Context length | 65,536 tokens | +| Vocab size | 151,936 | +| Weight format | GGUF Q2_0 g128: {-1, 0, +1} with FP16 group-wise scaling | +| Packed Q2_0 size | **2.03 GiB** (2.18 GB) | +| Ternary coverage | Embeddings, attention projections, MLP projections, LM head | +| License | Apache 2.0 | + +## Quantization Format: GGUF Q2_0 (g128) + +Each weight takes a value from {-1, 0, +1}, with one shared FP16 scale per group of 128 weights: + +``` +w_i = scale_g * t_i, t_i in {-1, 0, +1} +``` + +Q2_0 encodes each weight as a 2-bit code `q in {0, 1, 2, 3}`, dequantized via `w = (q - 1) * scale`. One 128-element block is 34 bytes (2 bytes FP16 scale + 32 bytes of packed 2-bit codes) for an effective **2.125 bits/weight**. The fourth code point (`q = 3`, reconstructing to `+2 * scale`) is reserved for future extensions; for ternary weights it is unused. + +### Memory + +| Format | Size | Reduction | Ratio | +| :---------------- | ----------: | --------: | ---------: | +| FP16 | 16.38 GB | -- | 1.0x | +| **GGUF Q2_0 g128**| **2.03 GiB** (2.18 GB) | **86.7%** | **7.5x** | + +## Files in this repo + +| File | Format | Size | Recommended | +| :------------------------------ | :----- | -----: | :---------- | +| `Ternary-Bonsai-8B-F16.gguf` | FP16 | 16.38 GB | baseline / re-quantization source | +| `Ternary-Bonsai-8B-Q2_0.gguf` | Q2_0 (g128) | 2.03 GiB | **recommended** (lossless for ternary) | + +## Quickstart + +### Build from the Prism fork + +```bash +git clone https://github.com/PrismML-Eng/llama.cpp +cd llama.cpp +cmake -B build -DGGML_METAL=ON # or -DGGML_CUDA=ON, -DGGML_VULKAN=ON +cmake --build build -j +``` + +### `llama.cpp` CLI + +```bash +./build/bin/llama-cli \ + -m Ternary-Bonsai-8B-Q2_0.gguf \ + -p "Explain quantum computing in simple terms." \ + -n 256 +``` + +### `llama.cpp` server + +```bash +./build/bin/llama-server -m Ternary-Bonsai-8B-Q2_0.gguf -c 4096 +``` + + +## Throughput (llama.cpp, Apple M4 Pro 48 GB) + +| Backend | PP512 (tok/s) | TG128 (tok/s) | +| :--------------- | ------------: | ------------: | +| Metal (GPU) | 455 | **76** | +| NEON CPU (10 t) | 146 | **32** | + +Flags: `-ngl 99 -fa 1` for Metal; `-ngl 0 -fa 1 -t 10` for CPU. + +## Benchmarks + +Evaluated with EvalScope v1.4.2 + vLLM 0.15.1 on NVIDIA H100 under identical infrastructure, generation parameters, and scoring. All models are in the 6B-9B parameter range. + +| Model | Size | Avg | MMLU-R | MuSR | GSM8K | HE+ | IFEval | BFCL | +| :------------------------ | ----------: | -------: | -----: | ---: | ----: | ---: | -----: | ---: | +| Qwen 3 8B | 16.38 GB | **79.3** | 83 | 55 | 93 | 82.3 | 81.5 | 81 | +| **Ternary Bonsai 8B** | **2.18 GB** | **75.5** | 72.6 | 56.2 | 91 | 77.4 | 81.8 | 73.9 | +| *1-bit Bonsai 8B (prior)* | *1.15 GB* | *70.5* | 65.7 | 50 | 88 | 73.8 | 79.8 | 65.7 | +| RNJ 8B | 16.63 GB | **73.1** | 75.5 | 50.4 | 93.7 | 84.2 | 73.8 | 61.1 | +| Ministral3 8B | 16.04 GB | **71.0** | 68.9 | 53.8 | 87.9 | 72.6 | 67.4 | 75.4 | +| Olmo 3 7B | 14.60 GB | **70.9** | 72 | 56.1 | 92.5 | 79.3 | 87.1 | 38.4 | + +Ternary Bonsai 8B ranks **2nd** among all compared models despite being 1/8th the size. + +## Intelligence Density + +``` +density = -ln(1 - score/100) / size_GB +``` + +| Model | Size | Intelligence Density (1/GB) | +| :------------------------ | ----------: | --------------------------: | +| **Ternary Bonsai 8B** | **2.18 GB** | **0.645** | +| *1-bit Bonsai 8B (prior)* | *1.15 GB* | *1.062* | +| Qwen 3 8B | 16.38 GB | 0.096 | +| RNJ 8B | 16.62 GB | 0.079 | + +## Citation + +```bibtex +@techreport{ternarybonsai, + title = {Ternary Bonsai: 1.58-bit Language Models at 8B, 4B, and 1.7B Scale}, + author = {Prism ML}, + year = {2026}, + month = {April}, + url = {https://prismml.com} +} +``` + +## Contact + +For questions, feedback, or collaboration inquiries: **contact@prismml.com** \ No newline at end of file diff --git a/Ternary-Bonsai-8B-F16.gguf b/Ternary-Bonsai-8B-F16.gguf new file mode 100644 index 0000000..8d4e035 --- /dev/null +++ b/Ternary-Bonsai-8B-F16.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6abfaf896c1e36db825112fc0a18e49adea05eeca1c6b2fba4d785ca7e947ff +size 16383663200 diff --git a/Ternary-Bonsai-8B-Q2_0.gguf b/Ternary-Bonsai-8B-Q2_0.gguf new file mode 100644 index 0000000..434453c --- /dev/null +++ b/Ternary-Bonsai-8B-Q2_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c8d70470a5d97e5a2b9410ddd899cb740116591462626c60cb2fead6448f60b +size 2182184672 diff --git a/assets/bonsai-logo.svg b/assets/bonsai-logo.svg new file mode 100644 index 0000000..6e2df3a --- /dev/null +++ b/assets/bonsai-logo.svg @@ -0,0 +1 @@ + diff --git a/assets/frontier.png b/assets/frontier.png new file mode 100644 index 0000000..6035ebf Binary files /dev/null and b/assets/frontier.png differ diff --git a/assets/frontier.svg b/assets/frontier.svg new file mode 100644 index 0000000..034dfe9 --- /dev/null +++ b/assets/frontier.svg @@ -0,0 +1,172 @@ + + + + + + + + + + + + + + + + + + Frontier efficiency + Average score (IFEval, GSM8K, HumanEval+, BFCL, MuSR, MMLU-Redux) + + + + + + + + + + + + + + + + + + + + + 40 + 50 + 60 + 70 + 80 + + + + + + + + + + 0.25 GB + 0.5 GB + 1 GB + 2 GB + 4 GB + 8 GB + 16 GB + + + + + + + + + + + + Model size in GB (log scale) + Average benchmark score + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1-Bit Bonsai 1.7B + 1-Bit Bonsai 4B + 1-Bit Bonsai 8B + + + + + + + + Ternary Bonsai 1.7B + Ternary Bonsai 4B + Ternary Bonsai 8B + + + Qwen3 0.6B + Qwen3 1.7B + Ministral3 3B + Qwen3 4B + Qwen3 8B +