初始化项目,由ModelHub XC社区提供模型
Model: inclusionAI/Ling-Coder-lite-base Source: Original Platform
This commit is contained in:
47
.gitattributes
vendored
Normal file
47
.gitattributes
vendored
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.model filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.db* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ark* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text
|
||||||
|
**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gguf* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ggml filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.llamafile* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
21
LICENCE
Normal file
21
LICENCE
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2025 inclusionAI
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
122
README.md
Normal file
122
README.md
Normal file
@@ -0,0 +1,122 @@
|
|||||||
|
---
|
||||||
|
datasets:
|
||||||
|
- inclusionAI/Ling-Coder-SyntheticQA
|
||||||
|
language:
|
||||||
|
- en
|
||||||
|
- zh
|
||||||
|
library_name: transformers
|
||||||
|
license: mit
|
||||||
|
pipeline_tag: text-generation
|
||||||
|
tags:
|
||||||
|
- code
|
||||||
|
- moe
|
||||||
|
---
|
||||||
|
|
||||||
|
# Ling-Coder-lite-base
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
<img src="https://huggingface.co/inclusionAI/Ling-lite/resolve/main/ant-bailing.png" width="100"/>
|
||||||
|
<p>
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
🤖 <a href="https://modelscope.cn/organization/inclusionAI">ModelScope</a>
|
||||||
|
🤗 <a href="https://huggingface.co/inclusionAI">Hugging Face</a>
|
||||||
|
🖥️ <a href="https://github.com/codefuse-ai/Ling-Coder-Lite">GitHub</a>
|
||||||
|
<p>
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
Ling-Coder-Lite is a MoE LLM provided and open-sourced by InclusionAI, which has 16.8B parameters with 2.75B activated parameters. This model demonstrates state-of-the-art performance on 12 coding benchmarks, while simultaneously offering competitive latency and throughput compared to code LLMs of similar size. In addition to open-sourcing the model itself, we also release a substantial amount of code-related data, including synthetic QA, SFT and DPO datasets. More details are described in the technique report [Ling-Coder-TR](https://huggingface.co/papers/2503.17793).
|
||||||
|
|
||||||
|
## Model Downloads
|
||||||
|
|
||||||
|
You can download the following table to see the various parameters for your use case. If you are located in mainland China, we also provide the model on modelscope.cn to speed up the download process.
|
||||||
|
|
||||||
|
<div align="center">
|
||||||
|
|
||||||
|
| **Model** | **#Total Params** | **#Activated Params** | **Context Length** | **Download** |
|
||||||
|
| :----------------: | :---------------: | :-------------------: | :----------------: | :----------: |
|
||||||
|
| Ling-Coder-lite-base | 16.8B | 2.75B | 16K | [🤗 HuggingFace](https://huggingface.co/inclusionAI/Ling-Coder-lite-base) |
|
||||||
|
| Ling-Coder-lite | 16.8B | 2.75B | 16K | [🤗 HuggingFace](https://huggingface.co/inclusionAI/Ling-Coder-lite) |
|
||||||
|
| Ling-Coder-lite-GPTQ-Int8 | 16.8B | 2.75B | 16K | [🤗 HuggingFace](https://huggingface.co/inclusionAI/Ling-Coder-lite-GPTQ-Int8) |
|
||||||
|
</div>
|
||||||
|
|
||||||
|
## Dataset Downloads
|
||||||
|
|
||||||
|
<div align="center">
|
||||||
|
|
||||||
|
| **Model** | **Samples** | **Download** |
|
||||||
|
| :------------: | :----------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------: |
|
||||||
|
| Ling-Coder-SyntheticQA | 24M | [🤗 HuggingFace](https://huggingface.co/datasets/inclusionAI/Ling-Coder-SyntheticQA) |
|
||||||
|
| Ling-Coder-SFT | 5M | [🤗 HuggingFace](https://huggingface.co/datasets/inclusionAI/Ling-Coder-SFT) |
|
||||||
|
| Ling-Coder-DPO | 250K | [🤗 HuggingFace](https://huggingface.co/datasets/inclusionAI/Ling-Coder-DPO) |
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
## Evaluation
|
||||||
|
|
||||||
|
Detailed evaluation results are reported in our technical report [Ling-Coder-TR](https://huggingface.co/papers/2503.17793).
|
||||||
|
|
||||||
|
## Quickstart
|
||||||
|
### 🤗 Hugging Face Transformers
|
||||||
|
|
||||||
|
Here is a code snippet to show you how to use the chat model with `transformers`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||||
|
|
||||||
|
model_name = "inclusionAI/Ling-Coder-lite"
|
||||||
|
|
||||||
|
model = AutoModelForCausalLM.from_pretrained(
|
||||||
|
model_name,
|
||||||
|
torch_dtype="auto",
|
||||||
|
device_map="auto",
|
||||||
|
trust_remote_code=True
|
||||||
|
)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(
|
||||||
|
model_name,
|
||||||
|
trust_remote_code=True
|
||||||
|
)
|
||||||
|
|
||||||
|
prompt = "Write a quick sort algorithm in python."
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": prompt}
|
||||||
|
]
|
||||||
|
text = tokenizer.apply_chat_template(
|
||||||
|
messages,
|
||||||
|
tokenize=False,
|
||||||
|
add_generation_prompt=True
|
||||||
|
)
|
||||||
|
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
|
||||||
|
|
||||||
|
generated_ids = model.generate(
|
||||||
|
**model_inputs,
|
||||||
|
max_new_tokens=512
|
||||||
|
)
|
||||||
|
generated_ids = [
|
||||||
|
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
|
||||||
|
]
|
||||||
|
|
||||||
|
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Deployment
|
||||||
|
Please refer to [Github](https://github.com/codefuse-ai/Ling-Coder-Lite/blob/master/README.md)
|
||||||
|
|
||||||
|
## License
|
||||||
|
This code repository is licensed under [the MIT License](https://huggingface.co/inclusionAI/Ling-Coder-lite/blob/main/LICENCE).
|
||||||
|
|
||||||
|
## Citation
|
||||||
|
|
||||||
|
```
|
||||||
|
@misc{codefuse2025samplemattersleveragingmixtureofexperts,
|
||||||
|
title={Every Sample Matters: Leveraging Mixture-of-Experts and High-Quality Data for Efficient and Accurate Code LLM},
|
||||||
|
author={Codefuse and Ling Team},
|
||||||
|
year={2025},
|
||||||
|
eprint={2503.17793},
|
||||||
|
archivePrefix={arXiv},
|
||||||
|
primaryClass={cs.LG},
|
||||||
|
url={https://arxiv.org/abs/2503.17793},
|
||||||
|
}
|
||||||
|
```
|
||||||
43
config.json
Normal file
43
config.json
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
{
|
||||||
|
"architectures": [
|
||||||
|
"BailingMoeForCausalLM"
|
||||||
|
],
|
||||||
|
"attention_dropout": 0.0,
|
||||||
|
"auto_map": {
|
||||||
|
"AutoConfig": "configuration_bailing_moe.BailingMoeConfig",
|
||||||
|
"AutoModel": "modeling_bailing_moe.BailingMoeModel",
|
||||||
|
"AutoModelForCausalLM": "modeling_bailing_moe.BailingMoeForCausalLM"
|
||||||
|
},
|
||||||
|
"eos_token_id": 126081,
|
||||||
|
"pad_token_id": 126081,
|
||||||
|
"first_k_dense_replace": 0,
|
||||||
|
"hidden_act": "silu",
|
||||||
|
"hidden_size": 2048,
|
||||||
|
"initializer_range": 0.006,
|
||||||
|
"intermediate_size": 5632,
|
||||||
|
"max_position_embeddings": 4096,
|
||||||
|
"model_type": "bailing_moe",
|
||||||
|
"moe_intermediate_size": 1408,
|
||||||
|
"num_experts": 64,
|
||||||
|
"num_shared_experts": 2,
|
||||||
|
"norm_topk_prob": true,
|
||||||
|
"num_attention_heads": 16,
|
||||||
|
"num_experts_per_tok": 6,
|
||||||
|
"num_hidden_layers": 28,
|
||||||
|
"num_key_value_heads": 4,
|
||||||
|
"pretraining_tp": 1,
|
||||||
|
"rms_norm_eps": 1e-05,
|
||||||
|
"rope_scaling": null,
|
||||||
|
"rope_theta": 10000,
|
||||||
|
"tie_word_embeddings": false,
|
||||||
|
"torch_dtype": "bfloat16",
|
||||||
|
"transformers_version": "4.36.0",
|
||||||
|
"use_cache": true,
|
||||||
|
"use_bias": false,
|
||||||
|
"use_qkv_bias": false,
|
||||||
|
"vocab_size": 126464,
|
||||||
|
"output_router_logits": false,
|
||||||
|
"embedding_dropout": 0.0,
|
||||||
|
"norm_head": true,
|
||||||
|
"output_dropout": 0.0
|
||||||
|
}
|
||||||
1
configuration.json
Normal file
1
configuration.json
Normal file
@@ -0,0 +1 @@
|
|||||||
|
{"framework":"Pytorch","task":"text-generation"}
|
||||||
78
configuration_bailing_moe.py
Normal file
78
configuration_bailing_moe.py
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
""" Bailing MoE model configuration """
|
||||||
|
|
||||||
|
from transformers.configuration_utils import PretrainedConfig
|
||||||
|
|
||||||
|
|
||||||
|
class BailingMoeConfig(PretrainedConfig):
|
||||||
|
model_type = "bailing_moe"
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
vocab_size=30592,
|
||||||
|
hidden_size=1024,
|
||||||
|
intermediate_size=None,
|
||||||
|
num_hidden_layers=24,
|
||||||
|
num_attention_heads=16,
|
||||||
|
num_key_value_heads=0,
|
||||||
|
hidden_act="silu",
|
||||||
|
use_qkv_bias=False, # bailing only
|
||||||
|
use_bias=True, # bailing only
|
||||||
|
rms_norm_eps=1e-05,
|
||||||
|
norm_head=False, # bailing only
|
||||||
|
tie_word_embeddings=False, # PretrainedConfig key, here change default value.
|
||||||
|
embedding_dropout=0.1,
|
||||||
|
attention_dropout=0.1,
|
||||||
|
output_dropout=0.1,
|
||||||
|
initializer_range=0.02,
|
||||||
|
max_position_embeddings=16384,
|
||||||
|
rope_theta=10000.0,
|
||||||
|
use_cache=True,
|
||||||
|
use_sliding_window=False,
|
||||||
|
sliding_window=4096,
|
||||||
|
max_window_layers=28,
|
||||||
|
rope_scaling=None,
|
||||||
|
pad_token_id=126081,
|
||||||
|
num_experts=16,
|
||||||
|
num_shared_experts=0,
|
||||||
|
num_experts_per_tok=2,
|
||||||
|
norm_topk_prob=True,
|
||||||
|
moe_intermediate_size=None,
|
||||||
|
first_k_dense_replace=0,
|
||||||
|
head_dim=None,
|
||||||
|
output_router_logits=False,
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
|
self.num_hidden_layers = num_hidden_layers
|
||||||
|
self.vocab_size = vocab_size
|
||||||
|
self.hidden_size = hidden_size
|
||||||
|
self.intermediate_size = intermediate_size
|
||||||
|
self.num_attention_heads = num_attention_heads
|
||||||
|
self.num_key_value_heads = num_key_value_heads
|
||||||
|
self.hidden_act = hidden_act
|
||||||
|
self.use_qkv_bias = use_qkv_bias
|
||||||
|
self.use_bias = use_bias
|
||||||
|
self.norm_head = norm_head
|
||||||
|
self.rms_norm_eps = rms_norm_eps
|
||||||
|
self.embedding_dropout = embedding_dropout
|
||||||
|
self.attention_dropout = attention_dropout
|
||||||
|
self.output_dropout = output_dropout
|
||||||
|
self.initializer_range = initializer_range
|
||||||
|
self.max_position_embeddings = max_position_embeddings
|
||||||
|
self.rope_theta = rope_theta
|
||||||
|
self.use_cache = use_cache
|
||||||
|
self.use_sliding_window = use_sliding_window
|
||||||
|
self.sliding_window = sliding_window
|
||||||
|
self.max_window_layers = max_window_layers
|
||||||
|
self.head_dim = head_dim
|
||||||
|
self.rope_scaling = rope_scaling
|
||||||
|
|
||||||
|
# MoE configs
|
||||||
|
self.num_experts = num_experts
|
||||||
|
self.num_shared_experts = num_shared_experts
|
||||||
|
self.num_experts_per_tok = num_experts_per_tok
|
||||||
|
self.norm_topk_prob = norm_topk_prob
|
||||||
|
self.moe_intermediate_size = moe_intermediate_size
|
||||||
|
self.first_k_dense_replace = first_k_dense_replace
|
||||||
|
self.output_router_logits = output_router_logits
|
||||||
|
|
||||||
|
super().__init__(pad_token_id=pad_token_id, tie_word_embeddings=tie_word_embeddings, **kwargs)
|
||||||
3
model-00001-of-00008.safetensors
Normal file
3
model-00001-of-00008.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:1558955e9b345400a86698efb0aed0835027a1478f0c3673e3b729f374ec80d4
|
||||||
|
size 4797989296
|
||||||
3
model-00002-of-00008.safetensors
Normal file
3
model-00002-of-00008.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:e9102beaac660729a06acd0d503031f44c8b54efe78e8ae946e7a8c0c77f534e
|
||||||
|
size 4430878712
|
||||||
3
model-00003-of-00008.safetensors
Normal file
3
model-00003-of-00008.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:3ee3c16b9bac41425162a62aa298946bfdfc8a1f15c4c46de7a8259c13646f19
|
||||||
|
size 4430879368
|
||||||
3
model-00004-of-00008.safetensors
Normal file
3
model-00004-of-00008.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:cf0ad65f6a3dfce00b27bdc9fde78cea49d7d7170d89334c7e3ef8947e26b5db
|
||||||
|
size 4430879368
|
||||||
3
model-00005-of-00008.safetensors
Normal file
3
model-00005-of-00008.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:2106a348a52af92256acf871c75b2738d1fe84334d85ab90778345c3170ed2e3
|
||||||
|
size 4948347472
|
||||||
3
model-00006-of-00008.safetensors
Normal file
3
model-00006-of-00008.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:94d7c8bff84f558288ebcf4bca979da9bb0c237b5115339955cdc5a58e96e120
|
||||||
|
size 4798387616
|
||||||
3
model-00007-of-00008.safetensors
Normal file
3
model-00007-of-00008.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:791db8ec66503a0350f6d87f1b8ce6443172886db2ee6e7c17f53ffbc3cc9b3c
|
||||||
|
size 4994474080
|
||||||
3
model-00008-of-00008.safetensors
Normal file
3
model-00008-of-00008.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:44c18a74c6352ff0ac31257d134da955eef7b1b21fe40f9857f8cdc292f6d398
|
||||||
|
size 772809032
|
||||||
5611
model.safetensors.index.json
Normal file
5611
model.safetensors.index.json
Normal file
File diff suppressed because it is too large
Load Diff
1440
modeling_bailing_moe.py
Normal file
1440
modeling_bailing_moe.py
Normal file
File diff suppressed because it is too large
Load Diff
14
special_tokens_map.json
Normal file
14
special_tokens_map.json
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
{
|
||||||
|
"additional_special_tokens": [
|
||||||
|
"<|fim_begin|>",
|
||||||
|
"<|fim_hole|>",
|
||||||
|
"<|fim_end|>",
|
||||||
|
"<|fim_pad|>",
|
||||||
|
"<|repo_name|>",
|
||||||
|
"<|file_sep|>"
|
||||||
|
],
|
||||||
|
"bos_token": "<|startoftext|>",
|
||||||
|
"cls_token": "[CLS]",
|
||||||
|
"eos_token": "<|endoftext|>",
|
||||||
|
"gmask_token": "[gMASK]"
|
||||||
|
}
|
||||||
254298
tokenizer.json
Normal file
254298
tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
23
tokenizer_config.json
Normal file
23
tokenizer_config.json
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
{
|
||||||
|
"add_bos_token": false,
|
||||||
|
"add_eos_token": false,
|
||||||
|
"additional_special_tokens": [
|
||||||
|
"<|fim_begin|>",
|
||||||
|
"<|fim_hole|>",
|
||||||
|
"<|fim_end|>",
|
||||||
|
"<|fim_pad|>",
|
||||||
|
"<|repo_name|>",
|
||||||
|
"<|file_sep|>"
|
||||||
|
],
|
||||||
|
"bos_token": "<|startoftext|>",
|
||||||
|
"clean_up_tokenization_spaces": false,
|
||||||
|
"cls_token": "[CLS]",
|
||||||
|
"eos_token": "<|endoftext|>",
|
||||||
|
"gmask_token": "[gMASK]",
|
||||||
|
"merges_file": null,
|
||||||
|
"model_max_length": 1000000000000000019884624838656,
|
||||||
|
"tokenizer_class": "PreTrainedTokenizerFast",
|
||||||
|
"vocab_file": null,
|
||||||
|
"pad_token": "<|endoftext|>",
|
||||||
|
"fast_tokenizer": true
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user