commit 9a6beb0883e93f0820a9945634a9506e98ce3215 Author: ModelHub XC Date: Tue Jun 2 02:30:12 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: KOREAson/KO-REAson-AX3_1-7B-0831 Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..6e7f6d9 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,57 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bin.* filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text + +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text + +model-00005-of-00006.safetensors filter=lfs diff=lfs merge=lfs -text +merges.txt filter=lfs diff=lfs merge=lfs -text +model-00002-of-00006.safetensors filter=lfs diff=lfs merge=lfs -text +vocab.json filter=lfs diff=lfs merge=lfs -text +model-00003-of-00006.safetensors filter=lfs diff=lfs merge=lfs -text +model-00004-of-00006.safetensors filter=lfs diff=lfs merge=lfs -text +model-00001-of-00006.safetensors filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text +model-00006-of-00006.safetensors filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..41612cf --- /dev/null +++ b/README.md @@ -0,0 +1,242 @@ +--- +library_name: transformers +tags: [] +--- + +# KO-REAson + +**KO-REAson** is a series of Korean-centric reasoning language models developed in collaboration with [OneLineAI](https://onelineai.com/), [KISTI-KONI](https://huggingface.co/KISTI-KONI), [HAE-RAE](https://huggingface.co/HAERAE-HUB) and ORACLE. + +We use the **Language-Mixed Chain-of-Thought (CoT)** approach, which allows the model to alternate between English and Korean during the “Think” stage of reasoning, preserving key Korean terms while leveraging English for logical scaffolding. + +Top-performing models of our series [KO-REAson-AX3_1-7B-0831 (KONI-7B-R-20250831)](https://huggingface.co/KISTI-KONI/KONI-7B-R-20250831) and [KO-REAson-7B-Q2_5-0831](https://huggingface.co/KoReason/KO-REASon-7B-Q2_5-0831) show performance comparable to models trained on closed-source datasets such as Exaone-Deep-7.8B. + + +

+ Model Comparison +
+ + Left: Average performance (Held-out-Ko) of open models trained on closed or open data; + our models are highlighted in green. + +

+ +## Model Details + +The **KO-REAson-0831** family comes in six variants based on the base model used. + +| Model (link) | Base | Notes | +| -------------------------------------------------------------------------------------------- | -------------------- | --------------------------- | +| [KO-REAson-L3_1-8B-0831](https://huggingface.co/KoReason/KO-REASon-L3_1-8B-0831) | [Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) | `L3_1` → Llama-3.1-8B | +| [KO-REAson-KL3_1-8B-0831](https://huggingface.co/KOREAson/KO-REAson-KL3_1-8B-0831) | [Koni-Llama-3.1-8B](https://huggingface.co/KISTI-KONI/KONI-Llama3.1-8B-Instruct-20241024) | `KL3_1` → Koni-Llama-3.1-8B; also called [KONI-Llama3.1-8B-R-20250831](https://huggingface.co/KISTI-KONI/KONI-Llama3.1-8B-R-20250831) | +| [KO-REAson-G3-4B-0831](https://huggingface.co/KoReason/KO-REASon-G3-4B-0831) | [Gemma-3 4B](https://huggingface.co/google/gemma-3-4b-it) | `G3` → Gemma-3-4B | +| [KO-REAson-AX3_1-7B-0831](https://huggingface.co/KOREAson/KO-REAson-7B-AX3_1-0831) | [A.X.-3.1-Light (≈7B)](https://huggingface.co/skt/A.X-3.1-Light) | `AX3_1` → A.X.-3.1-Light; also called [KONI-7B-R-20250831](https://huggingface.co/KISTI-KONI/KONI-7B-R-20250831) | +| [KO-REAson-K2505_8B-0831](https://huggingface.co/KoReason/KO-REASon-K2505_8B-0831) | [Kanana-2505 (8B)](https://huggingface.co/kakaocorp/kanana-1.5-8b-instruct-2505) | `K2505` → Kanana-2505 | +| [KO-REAson-7B-Q2_5-0831](https://huggingface.co/KoReason/KO-REASon-7B-Q2_5-0831) | [Qwen-2.5 (7B)](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) | `Q2_5` → Qwen-2.5 | + + + +# Performance + +**Evaluation Datasets** + +The model's performance was evaluated across a total of 11 benchmarks, and the evaluation suite is divided into two parts: (You can check these benchmarks in [HAERAE-HUB/KoSimpleEval](https://huggingface.co/datasets/HAERAE-HUB/KoSimpleEval)) + +- **Held-in**: This set of benchmarks is used for routine monitoring of the model's performance during the training and ablation study phases. +- **Held-out**: This set is used only once to evaluate the final model after all training and ablations are complete. + +This separation is designed to prevent inadvertent overfitting to the benchmarks during the iterative training process and to provide a more accurate measure of the model's generalization capabilities. + +|**Category**|**Held-in**|**Held-out**| +|---|---|---| +|**General Knowledge**|KMMLU-Redux|KMMLU-HARD, KMMLU-Pro| +|**Reasoning**|MCLM|KSM, GPQA, AIME2024, AIME2025| +|**Korean-specific**|HAE-RAE Bench|CLIcK, KoBALT-700| + + +**Comparison with models trained on public datasets** + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Models# InstancesMethodologyHeld-Out (Ko)Held-Out (En)Total
KO-REASon-AX3_1-7B-0831(KONI-7B-R-20250831; Ours)260kSFT44.641.243.3
KO-REASon-7B-Q2_5-0831(Ours)260kSFT45.1038.7549.95
KO-REAson-KL3_1-8B-0831(KONI-Llama3.1-8B-R-20250831)260kSFT40.1330.5743.66
Open Recipe (En)
OpenThinker3-7B1.2MSFT33.655.541.8
s1.1-7B1kSFT35.623.431.1
Llama-3.1-Nemotron-Nano-8B-v1>3MSFT & RL27.044.133.4
Open Recipe (Ko)
Ko-R1-14B45kSFT43.746.344.7
Ko-R1-7B45kSFT27.336.130.6
LLaMa-3.1-Ko-Reasoning-8B63kSFT17.77.714.0
+ +**Held-out benchmark performance** + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModelModel SizeGeneralReasoningKorean-SpecificAverage
(Held-out)
Average
(Held-out-Ko)
KMMLU-HARDKMMLU-ProKSMAIME 2024AIME 2025GPQACLIcKKoBALT-700
Llama-3.1-Nemotron-Nano-8B8.0321.4722.8947.0656.6743.3332.3234.549.2933.4527.05
Llama-3.1-Korean-Reasoning-8B-Instruct8.0314.9121.726.090.000.0023.2339.656.1413.9717.70
EXAONE-Deep-7.8B7.8240.9637.3570.8070.0063.3364.6554.2418.8652.5244.44
DeepSeek-R1-Distill-Qwen-7B7.620.0023.0056.0960.0040.0043.430.008.2928.8517.48
DeepSeek-R1-Distill-Llama-8B8.0323.2226.2629.9733.3320.0046.4639.0513.2928.9526.36
s1.1-7B7.6231.1637.7030.6016.6723.3330.3056.8421.8631.0635.63
OpenThinker3-7B7.6230.3126.2663.5966.6753.3346.4647.6910.1435.6330.60
Ko-R1-7B7.6128.4619.3151.6146.6733.3328.2832.484.7130.6127.31
KO-REAson-KL3_1-8B-0831(KONI-Llama3.1-8B-R-20250831)8.0344.6440.0837.9623.3330.0038.3856.3921.5730.5740.13
KO-REASon-AX3_1-7B-0831 (KONI-7B-R-20250831)7.2645.5738.1352.8053.3333.3336.8762.8623.4343.2944.56
KO-REASon-7B-Q2_5-08317.2646.8144.9348.1143.3330.0042.9360.6525.0042.7245.10
+ + +## Citation +``` +The paper will be released soon! +``` + + +## Contact + +For any questions contact us via the following email :) + +``` +spthsrbwls123@yonsei.ac.kr +``` + + +## Acknowlegments +This research was supported by the Korea Institute of Science and Technology Information (KISTI) (No.(KISTI) K25L1M1C1), aimed at developing KONI (KISTI Open Neural Intelligence), a large language model specialized in science and technology. \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..3eb47e7 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,72 @@ +{%- if tools is iterable and tools | length > 0 %} + {{- '<|im_start|><|system|>'}} + {{- '당신은 도구 호출 기능을 갖춘 유용한 도우미입니다. 사용자의 요청을 처리하기 위해서 필요한 도구가 주어진 목록에 있는 경우 도구 호출로 응답하세요. +필요한 도구가 목록에 없는 경우에는 도구 호출 없이 사용자가 요구한 정보를 제공하세요. +필요한 도구가 목록에 있지만 해당 도구를 호출하는데 필요한 argument 정보가 부족한 경우 해당 정보를 사용자에게 요청하세요. +사용자의 요청을 처리하기 위해 여러번 도구를 호출할 수 있어야 합니다. +도구 호출 이후 도구 실행 결과를 입력으로 받으면 해당 결과를 활용하여 답변을 생성하세요. + +다음은 접근할 수 있는 도구들의 목록 입니다: + +'}} + {%- for t in tools %} + {{- t | tojson }} + {{- ' +' }} + {%- endfor %} + {{- '' }} + {{- ' + +도구를 호출하려면 아래의 JSON으로 응답하세요. +도구 호출 형식: {"name": 도구 이름, "arguments": dictionary 형태의 도구 인자값}' }} + {{- '<|im_end|>' }} + {%- endif %} + + {%- for message in messages %} + {%- if message.role == 'system' %} + {{- '<|im_start|><|system|>' + message.content + '<|im_end|>'}} + {%- elif message.role == 'user' %} + {{- '<|im_start|><|user|>' + message.content + '<|im_end|>'}} + {%- elif message.role == 'assistant' %} + {{- '<|im_start|><|assistant|>'}} + {%- set content = '' %} + {%- if message.content is defined %} + {%- set content = message.content %} + {%- endif %} + + {%- if add_generation_prompt and not (message.reasoning_content is defined and message.reasoning_content is not none) %} + {%- if '' in message.content %} + {%- set content = message.content.split(''.strip())[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + + {{- content}} + {%- if message.tool_calls is defined %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '' }} + {{- '{' }} + {{- '"name": "' }} + {{- tool_call.name }} + {{- '"' }} + {%- if tool_call.arguments is defined %} + {{- ', ' }} + {{- '"arguments": ' }} + {{- tool_call.arguments|tojson }} + {%- endif %} + {{- '}' }} + {{- '' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>'}} + + {%- elif message.role == 'tool' %} + {{- '<|im_start|><|extra_id_13|>' + message.content + '<|im_end|>'}} + {%- endif %} + {%- endfor %} + + {%- if add_generation_prompt %} + {{- '<|im_start|><|assistant|>' }} + {%- endif %} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..209fc7e --- /dev/null +++ b/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "bos_token_id": 0, + "eos_token_id": 0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 10880, + "max_position_embeddings": 32768, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.55.4", + "use_cache": false, + "vocab_size": 102400 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..9759d76 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,7 @@ +{ + "bos_token_id": 0, + "eos_token_id": 27, + "max_new_tokens": 32768, + "pad_token_id": 1, + "transformers_version": "4.55.4" +} diff --git a/merges.txt b/merges.txt new file mode 100644 index 0000000..2d20879 --- /dev/null +++ b/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d33bd322a424d0144e5bd123dbeef33f2538ac57adabf639984599ee9b9ca9b3 +size 1538148 diff --git a/model-00001-of-00006.safetensors b/model-00001-of-00006.safetensors new file mode 100644 index 0000000..f361287 --- /dev/null +++ b/model-00001-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:983e2d0b15b189c99e93f4ed296ba57408cefac3cfa81ad4157c7e01872efc1e +size 4957802784 diff --git a/model-00002-of-00006.safetensors b/model-00002-of-00006.safetensors new file mode 100644 index 0000000..b28d381 --- /dev/null +++ b/model-00002-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30b2117cada0e6a62c9d930558614a13113685b0b673bf65baf7f4a9c6ecc1de +size 4953676072 diff --git a/model-00003-of-00006.safetensors b/model-00003-of-00006.safetensors new file mode 100644 index 0000000..8d57afe --- /dev/null +++ b/model-00003-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec378fdedc1b03b36a9d47d78511a1e1a43fdf7018ab9aa8a2bb707be02c9bfc +size 4886567144 diff --git a/model-00004-of-00006.safetensors b/model-00004-of-00006.safetensors new file mode 100644 index 0000000..7dcb0f2 --- /dev/null +++ b/model-00004-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:771b22ae7c556a9b452eda7498d45efbe78eca8f7cb3340792ab0176a223b624 +size 4997716200 diff --git a/model-00005-of-00006.safetensors b/model-00005-of-00006.safetensors new file mode 100644 index 0000000..ff61098 --- /dev/null +++ b/model-00005-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e89d46643533ec0a14e3d2f122b29b67dca68425e4a825f26fa1aeffb20ab0a6 +size 4997716192 diff --git a/model-00006-of-00006.safetensors b/model-00006-of-00006.safetensors new file mode 100644 index 0000000..90b37f5 --- /dev/null +++ b/model-00006-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:562a20eb61c449b8208ebdef4c5637f6e267143e800e794bf69d5ab9314c2ccf +size 4265758312 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..24df2bd --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,299 @@ +{ + "metadata": { + "total_parameters": 7264800768, + "total_size": 29059203072 + }, + "weight_map": { + "lm_head.weight": "model-00006-of-00006.safetensors", + "model.embed_tokens.weight": "model-00001-of-00006.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.10.input_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.11.input_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.12.input_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.13.input_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.14.input_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.15.input_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.16.input_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.17.input_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.18.input_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.19.input_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.20.input_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.21.input_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.22.input_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.23.input_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.24.input_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.25.input_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.26.input_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.27.input_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.28.input_layernorm.weight": "model-00006-of-00006.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00006-of-00006.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.29.input_layernorm.weight": "model-00006-of-00006.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00006-of-00006.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.30.input_layernorm.weight": "model-00006-of-00006.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00006-of-00006.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.31.input_layernorm.weight": "model-00006-of-00006.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00006-of-00006.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.4.input_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.5.input_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.6.input_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.7.input_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.8.input_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", + "model.norm.weight": "model-00006-of-00006.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..0082fb1 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,88 @@ +{ + "additional_special_tokens": [ + "<|endoftext|>", + "<|pad|>", + "<|unk|>", + "<|sep|>", + "<|mask|>", + "<|cls|>", + "<|image|>", + "<|audio|>", + "<|user|>", + "<|system|>", + "<|assistant|>", + "<|extra_id_0|>", + "<|extra_id_1|>", + "<|extra_id_2|>", + "<|extra_id_3|>", + "<|extra_id_4|>", + "<|extra_id_5|>", + "<|extra_id_6|>", + "<|extra_id_7|>", + "<|extra_id_8|>", + "<|extra_id_9|>", + "<|extra_id_10|>", + "<|extra_id_13|>", + "<|im_start|>", + "<|im_sep|>", + "<|im_end|>", + "<|resident_reg|>", + "<|foreigner_reg|>", + "<|business_reg|>", + "<|credit_card|>", + "<|passport|>", + "<|driver_license|>", + "<|telephone|>", + "<|health_insurance|>", + "<|bank_account|>" + ], + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "cls_token": { + "content": "<|cls|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "mask_token": { + "content": "<|mask|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "sep_token": { + "content": "<|sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "<|unk|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..5bb9f8f --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3973eceeed921e72d54fbbf597e1bea0a219112a0809ad8eaf62a1754f8ea44 +size 8495125 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..26e1c1a --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,386 @@ +{ + "add_bos_token": false, + "add_prefix_space": false, + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|unk|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "<|sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "<|mask|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "5": { + "content": "<|cls|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "6": { + "content": "<|image|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "7": { + "content": "<|audio|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "8": { + "content": "<|user|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "9": { + "content": "<|system|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "10": { + "content": "<|assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "11": { + "content": "<|extra_id_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "12": { + "content": "<|extra_id_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "13": { + "content": "<|extra_id_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "14": { + "content": "<|extra_id_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "15": { + "content": "<|extra_id_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "16": { + "content": "<|extra_id_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "17": { + "content": "<|extra_id_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "18": { + "content": "<|extra_id_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "19": { + "content": "<|extra_id_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "20": { + "content": "<|extra_id_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21": { + "content": "<|extra_id_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "<|extra_id_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "25": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "26": { + "content": "<|im_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "27": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "28": { + "content": "<|resident_reg|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "29": { + "content": "<|foreigner_reg|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "30": { + "content": "<|business_reg|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "31": { + "content": "<|credit_card|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32": { + "content": "<|passport|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "33": { + "content": "<|driver_license|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "34": { + "content": "<|telephone|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "35": { + "content": "<|health_insurance|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "36": { + "content": "<|bank_account|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|endoftext|>", + "<|pad|>", + "<|unk|>", + "<|sep|>", + "<|mask|>", + "<|cls|>", + "<|image|>", + "<|audio|>", + "<|user|>", + "<|system|>", + "<|assistant|>", + "<|extra_id_0|>", + "<|extra_id_1|>", + "<|extra_id_2|>", + "<|extra_id_3|>", + "<|extra_id_4|>", + "<|extra_id_5|>", + "<|extra_id_6|>", + "<|extra_id_7|>", + "<|extra_id_8|>", + "<|extra_id_9|>", + "<|extra_id_10|>", + "<|extra_id_13|>", + "<|im_start|>", + "<|im_sep|>", + "<|im_end|>", + "<|resident_reg|>", + "<|foreigner_reg|>", + "<|business_reg|>", + "<|credit_card|>", + "<|passport|>", + "<|driver_license|>", + "<|telephone|>", + "<|health_insurance|>", + "<|bank_account|>" + ], + "bos_token": "<|endoftext|>", + "clean_up_tokenization_spaces": true, + "cls_token": "<|cls|>", + "eod_token": "<|endoftext|>", + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": {}, + "mask_token": "<|mask|>", + "max_length": 7680, + "model_max_length": 32768, + "pad_token": "<|pad|>", + "sep_token": "<|sep|>", + "tokenizer_class": "GPT2Tokenizer", + "unk_token": "<|unk|>", + "vocab_size": 102400 +} diff --git a/vocab.json b/vocab.json new file mode 100644 index 0000000..2124f0a --- /dev/null +++ b/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe985752b212eccc3e58ac96ae9ffe85fc842d6ff6c182ee3ff7d7bec9bb170c +size 2248633