239 lines
3.4 KiB
JSON
239 lines
3.4 KiB
JSON
|
|
{
|
||
|
|
"version": "1.0",
|
||
|
|
"truncation": null,
|
||
|
|
"padding": null,
|
||
|
|
"added_tokens": [
|
||
|
|
{
|
||
|
|
"id": 64,
|
||
|
|
"content": "<pad>",
|
||
|
|
"single_word": false,
|
||
|
|
"lstrip": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"id": 65,
|
||
|
|
"content": "<s>",
|
||
|
|
"single_word": false,
|
||
|
|
"lstrip": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"id": 66,
|
||
|
|
"content": "</s>",
|
||
|
|
"single_word": false,
|
||
|
|
"lstrip": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"special": true
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"id": 67,
|
||
|
|
"content": "<unk>",
|
||
|
|
"single_word": false,
|
||
|
|
"lstrip": false,
|
||
|
|
"rstrip": false,
|
||
|
|
"normalized": false,
|
||
|
|
"special": true
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"normalizer": null,
|
||
|
|
"pre_tokenizer": {
|
||
|
|
"type": "Split",
|
||
|
|
"pattern": {
|
||
|
|
"Regex": "\\(|\\)"
|
||
|
|
},
|
||
|
|
"behavior": "Isolated",
|
||
|
|
"invert": false
|
||
|
|
},
|
||
|
|
"post_processor": null,
|
||
|
|
"decoder": {
|
||
|
|
"type": "BPEDecoder",
|
||
|
|
"suffix": "</w>"
|
||
|
|
},
|
||
|
|
"model": {
|
||
|
|
"type": "BPE",
|
||
|
|
"dropout": null,
|
||
|
|
"unk_token": null,
|
||
|
|
"continuing_subword_prefix": null,
|
||
|
|
"end_of_word_suffix": null,
|
||
|
|
"fuse_unk": false,
|
||
|
|
"byte_fallback": false,
|
||
|
|
"ignore_merges": false,
|
||
|
|
"vocab": {
|
||
|
|
"#": 0,
|
||
|
|
"%": 1,
|
||
|
|
"(": 2,
|
||
|
|
")": 3,
|
||
|
|
"+": 4,
|
||
|
|
"-": 5,
|
||
|
|
"/": 6,
|
||
|
|
"0": 7,
|
||
|
|
"1": 8,
|
||
|
|
"2": 9,
|
||
|
|
"3": 10,
|
||
|
|
"4": 11,
|
||
|
|
"5": 12,
|
||
|
|
"6": 13,
|
||
|
|
"7": 14,
|
||
|
|
"8": 15,
|
||
|
|
"9": 16,
|
||
|
|
"=": 17,
|
||
|
|
"@": 18,
|
||
|
|
"B": 19,
|
||
|
|
"C": 20,
|
||
|
|
"F": 21,
|
||
|
|
"H": 22,
|
||
|
|
"I": 23,
|
||
|
|
"N": 24,
|
||
|
|
"O": 25,
|
||
|
|
"P": 26,
|
||
|
|
"S": 27,
|
||
|
|
"[": 28,
|
||
|
|
"\\": 29,
|
||
|
|
"]": 30,
|
||
|
|
"c": 31,
|
||
|
|
"i": 32,
|
||
|
|
"l": 33,
|
||
|
|
"n": 34,
|
||
|
|
"o": 35,
|
||
|
|
"r": 36,
|
||
|
|
"s": 37,
|
||
|
|
"cc": 38,
|
||
|
|
"CC": 39,
|
||
|
|
"c1": 40,
|
||
|
|
"=O": 41,
|
||
|
|
"c2": 42,
|
||
|
|
"H]": 43,
|
||
|
|
"[C": 44,
|
||
|
|
"[C@": 45,
|
||
|
|
"c1cc": 46,
|
||
|
|
"[C@@": 47,
|
||
|
|
"c3": 48,
|
||
|
|
"c2cc": 49,
|
||
|
|
"[C@H]": 50,
|
||
|
|
"[C@@H]": 51,
|
||
|
|
"NC": 52,
|
||
|
|
"c1ccc": 53,
|
||
|
|
"CCC": 54,
|
||
|
|
"CO": 55,
|
||
|
|
"cc1": 56,
|
||
|
|
"=C": 57,
|
||
|
|
"c1cccc": 58,
|
||
|
|
"n1": 59,
|
||
|
|
"N1": 60,
|
||
|
|
"nc": 61,
|
||
|
|
"c2cccc": 62,
|
||
|
|
"OC": 63
|
||
|
|
},
|
||
|
|
"merges": [
|
||
|
|
[
|
||
|
|
"c",
|
||
|
|
"c"
|
||
|
|
],
|
||
|
|
[
|
||
|
|
"C",
|
||
|
|
"C"
|
||
|
|
],
|
||
|
|
[
|
||
|
|
"c",
|
||
|
|
"1"
|
||
|
|
],
|
||
|
|
[
|
||
|
|
"=",
|
||
|
|
"O"
|
||
|
|
],
|
||
|
|
[
|
||
|
|
"c",
|
||
|
|
"2"
|
||
|
|
],
|
||
|
|
[
|
||
|
|
"H",
|
||
|
|
"]"
|
||
|
|
],
|
||
|
|
[
|
||
|
|
"[",
|
||
|
|
"C"
|
||
|
|
],
|
||
|
|
[
|
||
|
|
"[C",
|
||
|
|
"@"
|
||
|
|
],
|
||
|
|
[
|
||
|
|
"c1",
|
||
|
|
"cc"
|
||
|
|
],
|
||
|
|
[
|
||
|
|
"[C@",
|
||
|
|
"@"
|
||
|
|
],
|
||
|
|
[
|
||
|
|
"c",
|
||
|
|
"3"
|
||
|
|
],
|
||
|
|
[
|
||
|
|
"c2",
|
||
|
|
"cc"
|
||
|
|
],
|
||
|
|
[
|
||
|
|
"[C@",
|
||
|
|
"H]"
|
||
|
|
],
|
||
|
|
[
|
||
|
|
"[C@@",
|
||
|
|
"H]"
|
||
|
|
],
|
||
|
|
[
|
||
|
|
"N",
|
||
|
|
"C"
|
||
|
|
],
|
||
|
|
[
|
||
|
|
"c1cc",
|
||
|
|
"c"
|
||
|
|
],
|
||
|
|
[
|
||
|
|
"CC",
|
||
|
|
"C"
|
||
|
|
],
|
||
|
|
[
|
||
|
|
"C",
|
||
|
|
"O"
|
||
|
|
],
|
||
|
|
[
|
||
|
|
"cc",
|
||
|
|
"1"
|
||
|
|
],
|
||
|
|
[
|
||
|
|
"=",
|
||
|
|
"C"
|
||
|
|
],
|
||
|
|
[
|
||
|
|
"c1cc",
|
||
|
|
"cc"
|
||
|
|
],
|
||
|
|
[
|
||
|
|
"n",
|
||
|
|
"1"
|
||
|
|
],
|
||
|
|
[
|
||
|
|
"N",
|
||
|
|
"1"
|
||
|
|
],
|
||
|
|
[
|
||
|
|
"n",
|
||
|
|
"c"
|
||
|
|
],
|
||
|
|
[
|
||
|
|
"c2cc",
|
||
|
|
"cc"
|
||
|
|
],
|
||
|
|
[
|
||
|
|
"O",
|
||
|
|
"C"
|
||
|
|
]
|
||
|
|
]
|
||
|
|
}
|
||
|
|
}
|