From 4409676d79a51ecbf80ac63d9cdd639c2b68709a Mon Sep 17 00:00:00 2001
From: ModelHub XC <noreply@modelhub.org.cn>
Date: Tue, 16 Jun 2026 04:26:16 +0800
Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?=
 =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?=
 =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Model: Entrit/Llama-3.1-8B-trit-uniform-d4
Source: Original Platform
---
 .gitattributes         | 36 +++++++++++++++++++++
 .mirrored              |  1 +
 README.md              | 73 ++++++++++++++++++++++++++++++++++++++++++
 config.json            | 36 +++++++++++++++++++++
 generation_config.json |  9 ++++++
 model.safetensors      |  3 ++
 tokenizer.json         |  3 ++
 tokenizer_config.json  | 14 ++++++++
 8 files changed, 175 insertions(+)
 create mode 100644 .gitattributes
 create mode 100644 .mirrored
 create mode 100644 README.md
 create mode 100644 config.json
 create mode 100644 generation_config.json
 create mode 100644 model.safetensors
 create mode 100644 tokenizer.json
 create mode 100644 tokenizer_config.json

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..52373fe
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,36 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
diff --git a/.mirrored b/.mirrored
new file mode 100644
index 0000000..0baba08
--- /dev/null
+++ b/.mirrored
@@ -0,0 +1 @@
+2026-04-15T20:25:20.900418
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..f313f18
--- /dev/null
+++ b/README.md
@@ -0,0 +1,73 @@
+---
+license: llama3.1
+license_name: llama3.1
+license_link: https://huggingface.co/meta-llama/Llama-3.1-8B/blob/main/LICENSE
+base_model: meta-llama/Llama-3.1-8B
+tags:
+  - quantization
+  - ternary
+  - balanced-ternary
+  - tritllm
+  - llama
+  - llama-3.1
+library_name: transformers
+extra_gated_description: This model is a quantized derivative of Meta Llama 3.1. By accessing this model you agree to the Llama 3.1 Community License and the Meta Acceptable Use Policy.
+---
+
+# Llama-3.1-8B-trit-uniform-d4
+
+**Built with Llama.** Balanced ternary quantization of [`meta-llama/Llama-3.1-8B`](https://huggingface.co/meta-llama/Llama-3.1-8B) at depth **d=4** (81 levels per weight, **6.64 bits per weight**). Distributed under the [Llama 3.1 Community License Agreement](https://huggingface.co/meta-llama/Llama-3.1-8B/blob/main/LICENSE) and subject to Meta's [Acceptable Use Policy](https://www.llama.com/llama3_1/use-policy).
+
+Produced with the codec from **"Balanced Ternary Post-Training Quantization for Large Language Models"** (Stentzel, 2026). See [Entrit/tritllm-codec](https://huggingface.co/Entrit/tritllm-codec) for the codec source.
+
+## Quick load
+
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+model = AutoModelForCausalLM.from_pretrained("Entrit/Llama-3.1-8B-trit-uniform-d4")
+tokenizer = AutoTokenizer.from_pretrained("Entrit/Llama-3.1-8B-trit-uniform-d4")
+```
+
+The weights are dequantized to FP16 for stock-`transformers` compatibility. The on-disk size is therefore the same as the FP16 source. The 6.64-bpw figure refers to the *information content* of the quantized matrices and is what matters for inference on hardware that consumes the packed trit format directly (see [Entrit/tritllm-kernel](https://huggingface.co/Entrit/tritllm-kernel)).
+
+## Quantization details
+
+| Field | Value |
+|---|---|
+| Source model | [`meta-llama/Llama-3.1-8B`](https://huggingface.co/meta-llama/Llama-3.1-8B) |
+| Depth | d=4 (81 levels) |
+| Bits per weight | 6.64 |
+| Group size | 16 |
+| Scale codebook | 27-entry log-spaced (scale_depth=3) |
+| Method | Uniform PTQ |
+| Quantized layers | all 2D linear matrices |
+| Kept FP16 | `lm_head`, token embeddings, all `*_norm` layers |
+| Codec | tritllm v2 |
+
+## License and use
+
+This is a research artifact. The underlying weights remain governed by the Llama 3.1 Community License Agreement; commercial use is restricted to the terms of that license. By using this model you agree to:
+
+1. Comply with the [Llama 3.1 Community License](https://huggingface.co/meta-llama/Llama-3.1-8B/blob/main/LICENSE).
+2. Comply with Meta's [Acceptable Use Policy](https://www.llama.com/llama3_1/use-policy).
+3. Display "Built with Llama" attribution if you redistribute or publicly demo derivatives of this model.
+
+## Citation
+
+```
+@article{stentzel2026ternaryptq,
+  title  = {Balanced Ternary Post-Training Quantization for Large Language Models},
+  author = {Stentzel, Eric},
+  year   = 2026,
+  note   = {Entrit Systems}
+}
+```
+
+## Reproducibility
+
+```bash
+git clone https://huggingface.co/Entrit/tritllm-codec
+cd tritllm-codec
+python quantize_model_v2.py --model meta-llama/Llama-3.1-8B --configs uniform-d4 --out ./out
+```
diff --git a/config.json b/config.json
new file mode 100644
index 0000000..9ef9292
--- /dev/null
+++ b/config.json
@@ -0,0 +1,36 @@
+{
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 128000,
+  "dtype": "float16",
+  "eos_token_id": 128001,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "pad_token_id": null,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_parameters": {
+    "factor": 8.0,
+    "high_freq_factor": 4.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_theta": 500000.0,
+    "rope_type": "llama3"
+  },
+  "tie_word_embeddings": false,
+  "transformers_version": "5.5.3",
+  "use_cache": true,
+  "vocab_size": 128256
+}
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000..6d8151b
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,9 @@
+{
+  "_from_model_config": true,
+  "bos_token_id": 128000,
+  "do_sample": true,
+  "eos_token_id": 128001,
+  "temperature": 0.6,
+  "top_p": 0.9,
+  "transformers_version": "5.5.3"
+}
diff --git a/model.safetensors b/model.safetensors
new file mode 100644
index 0000000..fb33b54
--- /dev/null
+++ b/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ffb3e5ba920ffa74d10153226cb572a09305b0d7e91d45db41bb2b1f8bf25c13
+size 16060556328
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000..1c1d8d5
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
+size 17209920
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000..35f0a89
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,14 @@
+{
+  "backend": "tokenizers",
+  "bos_token": "<|begin_of_text|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|end_of_text|>",
+  "is_local": true,
+  "model_input_names": [
+    "input_ids",
+    "attention_mask"
+  ],
+  "model_max_length": 131072,
+  "pad_token": "<|end_of_text|>",
+  "tokenizer_class": "TokenizersBackend"
+}