From 538225ecaac5c2636ee6e3f74dc825d2e9b2b9f7 Mon Sep 17 00:00:00 2001
From: ModelHub XC
Date: Fri, 29 May 2026 22:21:57 +0800
Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?=
=?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?=
=?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Model: openbmb/BitCPM-CANN-3B
Source: Original Platform
---
.gitattributes | 35 +
README.md | 161 +
config.json | 31 +
generation_config.json | 8 +
pytorch_model.bin | 3 +
special_tokens_map.json | 81 +
tokenizer.json | 177952 +++++++++++++++++++++++++++++++++++++
tokenizer.model | 3 +
tokenizer_config.json | 116 +
9 files changed, 178390 insertions(+)
create mode 100644 .gitattributes
create mode 100644 README.md
create mode 100644 config.json
create mode 100644 generation_config.json
create mode 100644 pytorch_model.bin
create mode 100644 special_tokens_map.json
create mode 100644 tokenizer.json
create mode 100644 tokenizer.model
create mode 100644 tokenizer_config.json
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..a6344aa
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..c8e958d
--- /dev/null
+++ b/README.md
@@ -0,0 +1,161 @@
+---
+license: apache-2.0
+language:
+- zh
+- en
+pipeline_tag: text-generation
+library_name: transformers
+---
+
+

+
+
+
+GitHub Repo |
+Technical Report
+
+
+👋 Join us on Discord and WeChat
+
+
+## Introduction
+
+BitCPM-CANN is the first end-to-end 1.58-bit (ternary) large language model training system natively built on Huawei Ascend NPU. The system integrates quantization-aware training (QAT) into the Megatron-LM framework with MindSpeed acceleration, covering the full training stack from custom ternary operators to distributed parallel training on Ascend 910B.
+
+We train a family of four models—BitCPM-CANN-0.5B/1B/3B/8B—and evaluate them against their full-precision MiniCPM4 counterparts across 11 benchmarks. The 1B/3B/8B models retain **95.7%–97.2%** of full-precision performance, while enabling approximately **6× memory reduction** at inference time. QAT introduces only **5% training throughput overhead** (148 vs. 155 TFLOP/s per NPU).
+
+### Key Features
+
+- 🔬 **1.58-Bit Ternary Quantization**: Compresses model weights to ternary values {-1, 0, 1}, achieving ~90% bit-width reduction compared to BF16.
+- 🖥️ **Native Ascend NPU Training**: First publicly reported 1.58-bit training effort on domestic NPU platform at 8B scale, establishing reusable low-bit training infrastructure for the Ascend ecosystem.
+- ⚡ **Minimal Training Overhead**: Only 5% throughput degradation compared to full-precision training on Ascend 910B.
+- 📦 **~6× Inference Memory Reduction**: Enables longer contexts, more serving replicas, and edge deployment on consumer devices.
+
+### Important Note
+
+> The models in this repository are in **pseudo-quantized (fake quantization) format**. This means the weights are stored in standard floating-point format with ternary values already applied during training. You can load and run inference with these models **exactly the same way as full-precision models**—no special quantization libraries or custom kernels are required.
+
+## BitCPM-CANN Model Family
+
+| Model | HuggingFace | GGUF |
+|-------|-------------|------|
+| BitCPM-CANN-0.5B | [openbmb/BitCPM-CANN-0.5B](https://huggingface.co/openbmb/BitCPM-CANN-0.5B) | [openbmb/BitCPM-CANN-0.5B-gguf](https://huggingface.co/openbmb/BitCPM-CANN-0.5B-gguf) |
+| BitCPM-CANN-1B | [openbmb/BitCPM-CANN-1B](https://huggingface.co/openbmb/BitCPM-CANN-1B) | [openbmb/BitCPM-CANN-1B-gguf](https://huggingface.co/openbmb/BitCPM-CANN-1B-gguf) |
+| BitCPM-CANN-3B | [openbmb/BitCPM-CANN-3B](https://huggingface.co/openbmb/BitCPM-CANN-3B) | [openbmb/BitCPM-CANN-3B-gguf](https://huggingface.co/openbmb/BitCPM-CANN-3B-gguf) |
+| BitCPM-CANN-8B | [openbmb/BitCPM-CANN-8B](https://huggingface.co/openbmb/BitCPM-CANN-8B) | [openbmb/BitCPM-CANN-8B-gguf](https://huggingface.co/openbmb/BitCPM-CANN-8B-gguf) |
+
+## Usage
+
+### Inference with Transformers
+
+Since BitCPM-CANN models are in pseudo-quantized format, you can use them exactly like standard full-precision models:
+
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+torch.manual_seed(0)
+
+path = 'openbmb/BitCPM-CANN-3B'
+device = "cuda"
+tokenizer = AutoTokenizer.from_pretrained(path)
+model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.bfloat16, device_map=device, trust_remote_code=True)
+
+# User can directly use the chat interface
+responds, history = model.chat(tokenizer, "Write an article about Artificial Intelligence.", temperature=0.7, top_p=0.7)
+print(responds)
+
+# User can also use the generate interface
+# messages = [
+# {"role": "user", "content": "Write an article about Artificial Intelligence."},
+# ]
+# prompt_text = tokenizer.apply_chat_template(
+# messages,
+# tokenize=False,
+# add_generation_prompt=True,
+# )
+# model_inputs = tokenizer([prompt_text], return_tensors="pt").to(device)
+
+# model_outputs = model.generate(
+# **model_inputs,
+# max_new_tokens=1024,
+# top_p=0.7,
+# temperature=0.7
+# )
+# output_token_ids = [
+# model_outputs[i][len(model_inputs[i]):] for i in range(len(model_inputs['input_ids']))
+# ]
+
+# responses = tokenizer.batch_decode(output_token_ids, skip_special_tokens=True)[0]
+# print(responses)
+```
+
+## Evaluation Results
+
+### Main Results
+
+BitCPM-CANN models are evaluated against their full-precision MiniCPM4 counterparts across 11 benchmarks spanning commonsense reasoning, domain knowledge, and mathematics & reasoning.
+
+| Task | 8B FP | 8B Ternary | 3B FP | 3B Ternary | 1B FP | 1B Ternary | 0.5B FP | 0.5B Ternary |
+|------|-------|------------|-------|------------|-------|------------|---------|--------------|
+| ARC-c | 87.46 | 86.10 | 80.34 | 78.98 | 64.41 | 67.12 | 51.86 | 50.51 |
+| ARC-e | 95.06 | 93.47 | 92.77 | 88.36 | 79.89 | 79.01 | 71.78 | 65.08 |
+| BoolQ | 84.89 | 83.39 | 79.85 | 77.89 | 68.38 | 65.50 | 62.29 | 43.55 |
+| PIQA | 80.52 | 78.78 | 70.57 | 72.69 | 66.16 | 65.45 | 60.99 | 58.49 |
+| WinoGrande | 63.30 | 61.17 | 58.41 | 52.96 | 51.62 | 53.28 | 51.07 | 51.54 |
+| CMMLU | 80.62 | 78.92 | 78.11 | 76.53 | 74.57 | 67.42 | 65.22 | 60.49 |
+| C-Eval | 81.36 | 77.50 | 75.85 | 75.89 | 73.25 | 65.96 | 66.11 | 60.74 |
+| MMLU | 75.83 | 70.65 | 66.95 | 64.41 | 57.71 | 57.71 | 55.55 | 50.73 |
+| MMLU-Redux | 77.14 | 69.85 | 65.82 | 60.07 | 54.80 | 54.16 | 48.00 | 43.79 |
+| BBH | 76.72 | 70.70 | 68.29 | 68.30 | 64.40 | 60.40 | 49.87 | 47.44 |
+| GSM8K | 91.51 | 85.75 | 81.64 | 79.45 | 63.15 | 61.56 | 52.08 | 39.42 |
+| **Average (11 tasks)** | **81.31** | **77.84** | **74.42** | **72.32** | **65.30** | **63.42** | **57.71** | **51.98** |
+| **Retention** | | **95.7%** | | **97.2%** | | **97.1%** | | **90.1%** |
+
+### Key Observations
+
+- **1B and above achieve ≥95.7% retention**: The 3B model achieves the highest retention at 97.2%, demonstrating that ternary QAT at this scale introduces minimal capability loss.
+- **0.5B reveals scale-dependent sensitivity**: The smallest model retains 90.1%, indicating that quantization perturbation is more damaging when model capacity is limited.
+- **1:1 alignment with MiniCPM4**: The matched evaluation enables direct substitution decisions—deployments can replace specific full-precision models with their ternary counterparts with clearly quantified trade-offs.
+
+### Training Efficiency
+
+| Configuration | TFLOP/s per NPU | Overhead |
+|---------------|-----------------|----------|
+| Full-precision | 155 | — |
+| Ternary QAT | 148 | 4.5% |
+
+System-level throughput on 2-node 16-card Ascend 910C:
+- 3B model: ~2700 tokens/s per card
+- 8B model: ~1340 tokens/s per card
+
+## Technical Approach
+
+BitCPM-CANN uses a ternary quantizer that maps each weight group to {-1, 0, 1} scaled by a group-wise factor, trained with Straight-Through Estimator (STE) for gradient flow. The training follows a two-stage strategy: **complete QAT followed by post-training distillation**, which avoids amplifying training instability during early training.
+
+The system is built as a four-layer vertical stack on Ascend NPU:
+1. **QAT Training Logic**: Ternary quantizer with STE, pluggable quantization layers in Megatron-LM.
+2. **Megatron-LM Quantized Model Layer**: Tensor-parallel linear layers with integrated weight/activation quantizers.
+3. **Framework Entry Layer**: `torch_npu` and `mindspeed.megatron_adaptor` injection for NPU execution.
+4. **Ascend Software-Hardware Stack**: MindSpeed, CANN, HCCL communication, Ascend 910B NPU hardware.
+
+For full technical details, please refer to our [Technical Report](https://github.com/OpenBMB/MiniCPM/blob/main/docs/BitCPM_CANN.pdf).
+
+## Statement
+- As a language model, BitCPM-CANN generates content by learning from a vast amount of text.
+- However, it does not possess the ability to comprehend or express personal opinions or value judgments.
+- Any content generated by BitCPM-CANN does not represent the viewpoints or positions of the model developers.
+- Therefore, when using content generated by BitCPM-CANN, users should take full responsibility for evaluating and verifying it on their own.
+
+## LICENSE
+- This repository and BitCPM-CANN models are released under the [Apache-2.0](https://github.com/OpenBMB/MiniCPM/blob/main/LICENSE) License.
+
+## Citation
+- Please cite our technical report if you find our work valuable.
+
+```bibtex
+@article{bitcpmcann,
+ title={{BitCPM-CANN}: Native 1.58-Bit Large Language Model Training on Ascend NPU},
+ author={BitCPM Team},
+ year={2026}
+}
+```
diff --git a/config.json b/config.json
new file mode 100644
index 0000000..890f88c
--- /dev/null
+++ b/config.json
@@ -0,0 +1,31 @@
+{
+ "_name_or_path": "openbmb/CPM-2B",
+ "architectures": [
+ "LlamaForCausalLM"
+ ],
+ "bos_token_id": 1,
+ "eos_token_id": [2,73440],
+ "pad_token_id": 2,
+ "hidden_act": "silu",
+ "hidden_size": 2560,
+ "initializer_range": 0.1,
+ "intermediate_size": 10240,
+ "head_dim": 128,
+ "max_position_embeddings": 32768,
+ "model_type": "llama",
+ "num_attention_heads": 32,
+ "num_hidden_layers": 32,
+ "num_key_value_heads": 2,
+ "rms_norm_eps": 1e-06,
+ "rope_scaling": {
+ "factor": 1.0,
+ "rope_type": "longrope",
+ "long_factor": [0.9977997200264581, 1.014658295992452, 1.0349680404997148, 1.059429246056193, 1.0888815016813513, 1.1243301355211495, 1.166977103606075, 1.2182568066927284, 1.2798772354275727, 1.3538666751582975, 1.4426259039919596, 1.5489853358570191, 1.6762658237220625, 1.8283407612492941, 2.0096956085876183, 2.225478927469756, 2.481536379650452, 2.784415934557119, 3.1413289096347365, 3.560047844772632, 4.048719380066383, 4.615569542115128, 5.2684819496549835, 6.014438591970396, 6.858830049237097, 7.804668263503327, 8.851768731513417, 9.99600492938444, 11.228766118181639, 12.536757560834843, 13.902257701387796, 15.303885189125953, 16.717837610115794, 18.119465097853947, 19.484965238406907, 20.792956681060105, 22.02571786985731, 23.16995406772833, 24.217054535738416, 25.16289275000465, 26.007284207271347, 26.753240849586767, 27.40615325712662, 27.973003419175363, 28.461674954469114, 28.880393889607006, 29.237306864684626, 29.540186419591297, 29.79624387177199, 30.01202719065413, 30.193382037992453, 30.34545697551969, 30.47273746338473, 30.579096895249787, 30.66785612408345, 30.741845563814174, 30.80346599254902, 30.85474569563567, 30.897392663720595, 30.932841297560394, 30.962293553185553, 30.986754758742034, 31.007064503249293, 31.02392307921529],
+ "short_factor": [0.9977997200264581, 1.014658295992452, 1.0349680404997148, 1.059429246056193, 1.0888815016813513, 1.1243301355211495, 1.166977103606075, 1.2182568066927284, 1.2798772354275727, 1.3538666751582975, 1.4426259039919596, 1.5489853358570191, 1.6762658237220625, 1.8283407612492941, 2.0096956085876183, 2.225478927469756, 2.481536379650452, 2.784415934557119, 3.1413289096347365, 3.560047844772632, 4.048719380066383, 4.615569542115128, 5.2684819496549835, 6.014438591970396, 6.858830049237097, 7.804668263503327, 8.851768731513417, 9.99600492938444, 11.228766118181639, 12.536757560834843, 13.902257701387796, 15.303885189125953, 16.717837610115794, 18.119465097853947, 19.484965238406907, 20.792956681060105, 22.02571786985731, 23.16995406772833, 24.217054535738416, 25.16289275000465, 26.007284207271347, 26.753240849586767, 27.40615325712662, 27.973003419175363, 28.461674954469114, 28.880393889607006, 29.237306864684626, 29.540186419591297, 29.79624387177199, 30.01202719065413, 30.193382037992453, 30.34545697551969, 30.47273746338473, 30.579096895249787, 30.66785612408345, 30.741845563814174, 30.80346599254902, 30.85474569563567, 30.897392663720595, 30.932841297560394, 30.962293553185553, 30.986754758742034, 31.007064503249293, 31.02392307921529],
+ "original_max_position_embeddings": 32768
+ },
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.36.0",
+ "use_cache": true,
+ "vocab_size": 73448
+}
\ No newline at end of file
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000..1d3bb1e
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,8 @@
+{
+ "do_sample": true,
+ "top_p": 0.8,
+ "temperature": 0.8,
+ "bos_token_id": 1,
+ "eos_token_id": [2,73440],
+ "pad_token_id": 2
+}
diff --git a/pytorch_model.bin b/pytorch_model.bin
new file mode 100644
index 0000000..fdf83e0
--- /dev/null
+++ b/pytorch_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddb8522bfbf14d1408899f9e0f186ea55b84e89ee4156b6a61ad06faf0c6285e
+size 7211764418
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000..8619dda
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,81 @@
+{
+ "additional_special_tokens": [
+ {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ {
+ "content": "<|im_start|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ {
+ "content": "<|tool_call|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ {
+ "content": "<|execute_start|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ {
+ "content": "<|execute_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ {
+ "content": "<|fim_prefix|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ {
+ "content": "<|fim_middle|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ {
+ "content": "<|fim_suffix|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+ ],
+ "bos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "unk_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000..3df33d6
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,177952 @@
+{
+ "version": "1.0",
+ "truncation": null,
+ "padding": null,
+ "added_tokens": [
+ {
+ "id": 0,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 1,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 2,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 73440,
+ "content": "<|im_end|>",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 73441,
+ "content": "<|im_start|>",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 73442,
+ "content": "<|tool_call|>",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 73443,
+ "content": "<|execute_start|>",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 73444,
+ "content": "<|execute_end|>",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 73445,
+ "content": "<|fim_prefix|>",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 73446,
+ "content": "<|fim_middle|>",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 73447,
+ "content": "<|fim_suffix|>",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ }
+ ],
+ "normalizer": {
+ "type": "Sequence",
+ "normalizers": [
+ {
+ "type": "Prepend",
+ "prepend": "▁"
+ },
+ {
+ "type": "Replace",
+ "pattern": {
+ "String": " "
+ },
+ "content": "▁"
+ }
+ ]
+ },
+ "pre_tokenizer": null,
+ "post_processor": {
+ "type": "TemplateProcessing",
+ "single": [
+ {
+ "SpecialToken": {
+ "id": "",
+ "type_id": 0
+ }
+ },
+ {
+ "Sequence": {
+ "id": "A",
+ "type_id": 0
+ }
+ }
+ ],
+ "pair": [
+ {
+ "SpecialToken": {
+ "id": "",
+ "type_id": 0
+ }
+ },
+ {
+ "Sequence": {
+ "id": "A",
+ "type_id": 0
+ }
+ },
+ {
+ "SpecialToken": {
+ "id": "",
+ "type_id": 1
+ }
+ },
+ {
+ "Sequence": {
+ "id": "B",
+ "type_id": 1
+ }
+ }
+ ],
+ "special_tokens": {
+ "": {
+ "id": "",
+ "ids": [
+ 1
+ ],
+ "tokens": [
+ ""
+ ]
+ }
+ }
+ },
+ "decoder": {
+ "type": "Sequence",
+ "decoders": [
+ {
+ "type": "Replace",
+ "pattern": {
+ "String": "▁"
+ },
+ "content": " "
+ },
+ {
+ "type": "ByteFallback"
+ },
+ {
+ "type": "Fuse"
+ },
+ {
+ "type": "Strip",
+ "content": " ",
+ "start": 1,
+ "stop": 0
+ }
+ ]
+ },
+ "model": {
+ "type": "BPE",
+ "dropout": null,
+ "unk_token": "",
+ "continuing_subword_prefix": null,
+ "end_of_word_suffix": null,
+ "fuse_unk": true,
+ "byte_fallback": true,
+ "ignore_merges": false,
+ "vocab": {
+ "": 0,
+ "": 1,
+ "": 2,
+ "": 3,
+ "": 4,
+ "\n": 5,
+ "\t": 6,
+ "
": 7,
+ "
": 8,
+ "": 9,
+ "": 10,
+ "": 11,
+ "
": 12,
+ "": 13,
+ " | | ": 14,
+ "": 15,
+ "": 16,
+ "": 17,
+ "": 18,
+ "": 21,
+ "": 22,
+ "
": 23,
+ "": 24,
+ "": 25,
+ "": 26,
+ "": 27,
+ "": 28,
+ "": 29,
+ "": 30,
+ "": 31,
+ "": 32,
+ "
": 33,
+ "
": 34,
+ "
": 35,
+ "": 36,
+ "": 37,
+ "": 38,
+ "
": 39,
+ "": 40,
+ "": 41,
+ "
": 42,
+ "": 43,
+ "
": 44,
+ "
": 45,
+ "": 46,
+ "": 47,
+ "
": 48,
+ "": 49,
+ "": 50,
+ "": 51,
+ "0": 52,
+ "1": 53,
+ "2": 54,
+ "3": 55,
+ "4": 56,
+ "5": 57,
+ "6": 58,
+ "7": 59,
+ "8": 60,
+ "9": 61,
+ "+": 62,
+ "-": 63,
+ "=": 64,
+ ",": 65,
+ "。": 66,
+ "!": 67,
+ "?": 68,
+ "、": 69,
+ ":": 70,
+ "¥": 71,
+ ".": 72,
+ "!": 73,
+ "?": 74,
+ "...": 75,
+ "。。。": 76,
+ "。。。。。。": 77,
+ "《": 78,
+ "》": 79,
+ "【": 80,
+ "】": 81,
+ "『": 82,
+ "』": 83,
+ "```": 84,
+ "": 86,
+ "---": 87,
+ "": 88,
+ ";": 89,
+ ".": 90,
+ "=": 91,
+ "<": 92,
+ ">": 93,
+ "-": 94,
+ "+": 95,
+ "%": 96,
+ "‼": 97,
+ "㊣": 98,
+ "/": 99,
+ "|": 100,
+ "": 101,
+ "": 102,
+ "": 103,
+ "": 104,
+ "": 105,
+ "": 106,
+ "": 107,
+ "": 108,
+ "": 109,
+ "": 110,
+ "": 111,
+ "": 112,
+ "": 113,
+ "": 114,
+ "": 115,
+ "": 116,
+ "": 117,
+ "": 118,
+ "": 119,
+ "": 120,
+ "": 121,
+ "": 122,
+ "": 123,
+ "": 124,
+ "": 125,
+ "": 126,
+ "": 127,
+ "": 128,
+ "": 129,
+ "": 130,
+ "": 131,
+ "": 132,
+ "": 133,
+ "": 134,
+ "": 135,
+ "": 136,
+ "": 137,
+ "": 138,
+ "": 139,
+ "": 140,
+ "": 141,
+ "": 142,
+ "": 143,
+ "": 144,
+ "": 145,
+ "": 146,
+ "": 147,
+ "": 148,
+ "": 149,
+ "": 150,
+ "": 151,
+ "": 152,
+ "": 153,
+ "": 154,
+ "": 155,
+ "": 156,
+ "": 157,
+ "": 158,
+ "": 159,
+ "": 160,
+ "": 161,
+ "": 162,
+ "": 163,
+ "": 164,
+ "": 165,
+ "": 166,
+ "": 167,
+ "": 168,
+ "": 169,
+ "": 170,
+ "": 171,
+ "": 172,
+ "": 173,
+ "": 174,
+ "": 175,
+ "": 176,
+ "": 177,
+ "": 178,
+ "": 179,
+ "": 180,
+ "": 181,
+ "": 182,
+ "": 183,
+ "": 184,
+ "": 185,
+ "": 186,
+ "": 187,
+ "": 188,
+ "": 189,
+ "": 190,
+ "": 191,
+ "": 192,
+ "": 193,
+ "": 194,
+ "": 195,
+ "": 196,
+ "": 197,
+ "": 198,
+ "": 199,
+ "": 200,
+ "": 201,
+ "": 202,
+ "": 203,
+ "": 204,
+ "": 205,
+ "": 206,
+ "": 207,
+ "": 208,
+ "": 209,
+ "": 210,
+ "": 211,
+ "": 212,
+ "": 213,
+ "": 214,
+ "": 215,
+ "": 216,
+ "": 217,
+ "": 218,
+ "": 219,
+ "": 220,
+ "": 221,
+ "": 222,
+ "": 223,
+ "": 224,
+ "": 225,
+ "": 226,
+ "": 227,
+ "": 228,
+ "": 229,
+ "": 230,
+ "": 231,
+ "": 232,
+ "": 233,
+ "": 234,
+ "": 235,
+ "": 236,
+ "": 237,
+ "": 238,
+ "": 239,
+ "": 240,
+ "": 241,
+ "": 242,
+ "": 243,
+ "": 244,
+ "": 245,
+ "": 246,
+ "": 247,
+ "": 248,
+ "": 249,
+ "": 250,
+ "": 251,
+ "": 252,
+ "": 253,
+ "": 254,
+ "": 255,
+ "": 256,
+ "": 257,
+ "": 258,
+ "": 259,
+ "": 260,
+ "": 261,
+ "": 262,
+ "": 263,
+ "": 264,
+ "": 265,
+ "": 266,
+ "": 267,
+ "": 268,
+ "": 269,
+ "": 270,
+ "": 271,
+ "": 272,
+ "": 273,
+ "": 274,
+ "": 275,
+ "": 276,
+ "": 277,
+ "": 278,
+ "": 279,
+ "": 280,
+ "": 281,
+ "": 282,
+ "": 283,
+ "": 284,
+ "": 285,
+ "": 286,
+ "": 287,
+ "": 288,
+ "": 289,
+ "": 290,
+ "": 291,
+ "": 292,
+ "": 293,
+ "": 294,
+ "": 295,
+ "": 296,
+ "": 297,
+ "": 298,
+ "": 299,
+ "": 300,
+ "": 301,
+ "": 302,
+ "": 303,
+ "": 304,
+ "": 305,
+ "": 306,
+ "": 307,
+ "": 308,
+ "": 309,
+ "": 310,
+ "": 311,
+ "": 312,
+ "": 313,
+ "": 314,
+ "": 315,
+ "": 316,
+ "": 317,
+ "": 318,
+ "": 319,
+ "": 320,
+ "": 321,
+ "": 322,
+ "": 323,
+ "": 324,
+ "": 325,
+ "": 326,
+ "": 327,
+ "": 328,
+ "": 329,
+ "": 330,
+ "": 331,
+ "": 332,
+ "": 333,
+ "": 334,
+ "": 335,
+ "": 336,
+ "": 337,
+ "": 338,
+ "": 339,
+ "": 340,
+ "": 341,
+ "": 342,
+ "": 343,
+ "": 344,
+ "": 345,
+ "": 346,
+ "": 347,
+ "": 348,
+ "": 349,
+ "": 350,
+ "": 351,
+ "": 352,
+ "": 353,
+ "