446 lines
9.7 KiB
JSON
446 lines
9.7 KiB
JSON
|
|
{
|
||
|
|
"add_bos_token": true,
|
||
|
|
"add_prefix_space": false,
|
||
|
|
"added_tokens_decoder": {
|
||
|
|
"50256": {
|
||
|
|
"content": "<|endoftext|>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50257": {
|
||
|
|
"content": "<||bos||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50258": {
|
||
|
|
"content": "<||pad||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50259": {
|
||
|
|
"content": "<||unk||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50260": {
|
||
|
|
"content": "<||unused1||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50261": {
|
||
|
|
"content": "<||unused2||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50262": {
|
||
|
|
"content": "<||unused3||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50263": {
|
||
|
|
"content": "<||unused4||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50264": {
|
||
|
|
"content": "<||unused5||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50265": {
|
||
|
|
"content": "<||unused6||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50266": {
|
||
|
|
"content": "<||unused7||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50267": {
|
||
|
|
"content": "<||unused8||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50268": {
|
||
|
|
"content": "<||unused9||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50269": {
|
||
|
|
"content": "<||unused10||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50270": {
|
||
|
|
"content": "<||unused11||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50271": {
|
||
|
|
"content": "<||unused12||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50272": {
|
||
|
|
"content": "<||unused13||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50273": {
|
||
|
|
"content": "<||unused14||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50274": {
|
||
|
|
"content": "<||unused15||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50275": {
|
||
|
|
"content": "<||unused16||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50276": {
|
||
|
|
"content": "<||unused17||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50277": {
|
||
|
|
"content": "<||unused18||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50278": {
|
||
|
|
"content": "<||unused19||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50279": {
|
||
|
|
"content": "<||unused20||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50280": {
|
||
|
|
"content": "<||unused21||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50281": {
|
||
|
|
"content": "<||unused22||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50282": {
|
||
|
|
"content": "<||unused23||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50283": {
|
||
|
|
"content": "<||unused24||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50284": {
|
||
|
|
"content": "<||unused25||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50285": {
|
||
|
|
"content": "<||unused26||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50286": {
|
||
|
|
"content": "<||unused27||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50287": {
|
||
|
|
"content": "<||unused28||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50288": {
|
||
|
|
"content": "<||unused29||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50289": {
|
||
|
|
"content": "<||unused30||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50290": {
|
||
|
|
"content": "<||unused31||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50291": {
|
||
|
|
"content": "<||unused32||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50292": {
|
||
|
|
"content": "<||unused33||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50293": {
|
||
|
|
"content": "<||unused34||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50294": {
|
||
|
|
"content": "<||unused35||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50295": {
|
||
|
|
"content": "<||unused36||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50296": {
|
||
|
|
"content": "<||unused37||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50297": {
|
||
|
|
"content": "<||unused38||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50298": {
|
||
|
|
"content": "<||unused39||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50299": {
|
||
|
|
"content": "<||unused40||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50300": {
|
||
|
|
"content": "<||unused41||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50301": {
|
||
|
|
"content": "<||unused42||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50302": {
|
||
|
|
"content": "<||unused43||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
"50303": {
|
||
|
|
"content": "<||unused44||>",
|
||
|
|
"lstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"single_word": false,
|
||
|
|
"special": true
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"additional_special_tokens": [
|
||
|
|
"<||unused1||>",
|
||
|
|
"<||unused2||>",
|
||
|
|
"<||unused3||>",
|
||
|
|
"<||unused4||>",
|
||
|
|
"<||unused5||>",
|
||
|
|
"<||unused6||>",
|
||
|
|
"<||unused7||>",
|
||
|
|
"<||unused8||>",
|
||
|
|
"<||unused9||>",
|
||
|
|
"<||unused10||>",
|
||
|
|
"<||unused11||>",
|
||
|
|
"<||unused12||>",
|
||
|
|
"<||unused13||>",
|
||
|
|
"<||unused14||>",
|
||
|
|
"<||unused15||>",
|
||
|
|
"<||unused16||>",
|
||
|
|
"<||unused17||>",
|
||
|
|
"<||unused18||>",
|
||
|
|
"<||unused19||>",
|
||
|
|
"<||unused20||>",
|
||
|
|
"<||unused21||>",
|
||
|
|
"<||unused22||>",
|
||
|
|
"<||unused23||>",
|
||
|
|
"<||unused24||>",
|
||
|
|
"<||unused25||>",
|
||
|
|
"<||unused26||>",
|
||
|
|
"<||unused27||>",
|
||
|
|
"<||unused28||>",
|
||
|
|
"<||unused29||>",
|
||
|
|
"<||unused30||>",
|
||
|
|
"<||unused31||>",
|
||
|
|
"<||unused32||>",
|
||
|
|
"<||unused33||>",
|
||
|
|
"<||unused34||>",
|
||
|
|
"<||unused35||>",
|
||
|
|
"<||unused36||>",
|
||
|
|
"<||unused37||>",
|
||
|
|
"<||unused38||>",
|
||
|
|
"<||unused39||>",
|
||
|
|
"<||unused40||>",
|
||
|
|
"<||unused41||>",
|
||
|
|
"<||unused42||>",
|
||
|
|
"<||unused43||>",
|
||
|
|
"<||unused44||>"
|
||
|
|
],
|
||
|
|
"bos_token": "<||bos||>",
|
||
|
|
"clean_up_tokenization_spaces": true,
|
||
|
|
"eos_token": "<|endoftext|>",
|
||
|
|
"errors": "replace",
|
||
|
|
"model_max_length": 8192,
|
||
|
|
"pad_token": "<||pad||>",
|
||
|
|
"padding_side": "right",
|
||
|
|
"tokenizer_class": "GPT2Tokenizer",
|
||
|
|
"unk_token": "<||unk||>"
|
||
|
|
}
|