559 lines
7.2 KiB
JSON
559 lines
7.2 KiB
JSON
{
|
|
"version": "1.0",
|
|
"truncation": null,
|
|
"padding": null,
|
|
"added_tokens": [
|
|
{
|
|
"id": 128,
|
|
"content": "<pad>",
|
|
"single_word": false,
|
|
"lstrip": false,
|
|
"rstrip": false,
|
|
"normalized": false,
|
|
"special": true
|
|
},
|
|
{
|
|
"id": 129,
|
|
"content": "<s>",
|
|
"single_word": false,
|
|
"lstrip": false,
|
|
"rstrip": false,
|
|
"normalized": false,
|
|
"special": true
|
|
},
|
|
{
|
|
"id": 130,
|
|
"content": "</s>",
|
|
"single_word": false,
|
|
"lstrip": false,
|
|
"rstrip": false,
|
|
"normalized": false,
|
|
"special": true
|
|
},
|
|
{
|
|
"id": 131,
|
|
"content": "<unk>",
|
|
"single_word": false,
|
|
"lstrip": false,
|
|
"rstrip": false,
|
|
"normalized": false,
|
|
"special": true
|
|
}
|
|
],
|
|
"normalizer": null,
|
|
"pre_tokenizer": {
|
|
"type": "Split",
|
|
"pattern": {
|
|
"Regex": "\\(|\\)"
|
|
},
|
|
"behavior": "Isolated",
|
|
"invert": false
|
|
},
|
|
"post_processor": null,
|
|
"decoder": {
|
|
"type": "BPEDecoder",
|
|
"suffix": "</w>"
|
|
},
|
|
"model": {
|
|
"type": "BPE",
|
|
"dropout": null,
|
|
"unk_token": null,
|
|
"continuing_subword_prefix": null,
|
|
"end_of_word_suffix": null,
|
|
"fuse_unk": false,
|
|
"byte_fallback": false,
|
|
"ignore_merges": false,
|
|
"vocab": {
|
|
"#": 0,
|
|
"%": 1,
|
|
"(": 2,
|
|
")": 3,
|
|
"+": 4,
|
|
"-": 5,
|
|
"/": 6,
|
|
"0": 7,
|
|
"1": 8,
|
|
"2": 9,
|
|
"3": 10,
|
|
"4": 11,
|
|
"5": 12,
|
|
"6": 13,
|
|
"7": 14,
|
|
"8": 15,
|
|
"9": 16,
|
|
"=": 17,
|
|
"@": 18,
|
|
"B": 19,
|
|
"C": 20,
|
|
"F": 21,
|
|
"H": 22,
|
|
"I": 23,
|
|
"N": 24,
|
|
"O": 25,
|
|
"P": 26,
|
|
"S": 27,
|
|
"[": 28,
|
|
"\\": 29,
|
|
"]": 30,
|
|
"c": 31,
|
|
"i": 32,
|
|
"l": 33,
|
|
"n": 34,
|
|
"o": 35,
|
|
"r": 36,
|
|
"s": 37,
|
|
"cc": 38,
|
|
"CC": 39,
|
|
"c1": 40,
|
|
"=O": 41,
|
|
"c2": 42,
|
|
"H]": 43,
|
|
"[C": 44,
|
|
"[C@": 45,
|
|
"c1cc": 46,
|
|
"[C@@": 47,
|
|
"c3": 48,
|
|
"c2cc": 49,
|
|
"[C@H]": 50,
|
|
"[C@@H]": 51,
|
|
"NC": 52,
|
|
"c1ccc": 53,
|
|
"CCC": 54,
|
|
"CO": 55,
|
|
"cc1": 56,
|
|
"=C": 57,
|
|
"c1cccc": 58,
|
|
"n1": 59,
|
|
"N1": 60,
|
|
"nc": 61,
|
|
"c2cccc": 62,
|
|
"OC": 63,
|
|
"c3cc": 64,
|
|
"Cl": 65,
|
|
"C1": 66,
|
|
"N2": 67,
|
|
"CCN": 68,
|
|
"CC1": 69,
|
|
"c2ccccc2": 70,
|
|
"c2ccc": 71,
|
|
"n2": 72,
|
|
"O=C": 73,
|
|
"c1ccccc1": 74,
|
|
"C2": 75,
|
|
"CC2": 76,
|
|
"CN": 77,
|
|
"cc2": 78,
|
|
"CCO": 79,
|
|
"[C@@H]1": 80,
|
|
"C[C@H]": 81,
|
|
"c3cccc": 82,
|
|
"[n": 83,
|
|
"[nH]": 84,
|
|
"c1n": 85,
|
|
"cn": 86,
|
|
"c4": 87,
|
|
"[C@@H]2": 88,
|
|
"[C@H]1": 89,
|
|
"c3ccccc3": 90,
|
|
"Cc1ccc": 91,
|
|
"CCCC": 92,
|
|
"c2c": 93,
|
|
"[C@H]2": 94,
|
|
"COc1ccc": 95,
|
|
"/C": 96,
|
|
"c2n": 97,
|
|
"C[C@@H]": 98,
|
|
"Cc1cc": 99,
|
|
"c1c": 100,
|
|
"c3ccc": 101,
|
|
"CNC": 102,
|
|
"cccc": 103,
|
|
"n3": 104,
|
|
"CS": 105,
|
|
"nc1": 106,
|
|
"COC": 107,
|
|
"+]": 108,
|
|
"Br": 109,
|
|
"cc3": 110,
|
|
"N1CCC": 111,
|
|
"C3": 112,
|
|
"[N": 113,
|
|
"[N+]": 114,
|
|
"-]": 115,
|
|
"[O": 116,
|
|
"[O-]": 117,
|
|
"s1": 118,
|
|
"c1nc": 119,
|
|
"nc2": 120,
|
|
"N1C": 121,
|
|
"CCOC": 122,
|
|
"o1": 123,
|
|
"CCCCC": 124,
|
|
"CC3": 125,
|
|
"CCCN": 126,
|
|
"[C@]": 127
|
|
},
|
|
"merges": [
|
|
[
|
|
"c",
|
|
"c"
|
|
],
|
|
[
|
|
"C",
|
|
"C"
|
|
],
|
|
[
|
|
"c",
|
|
"1"
|
|
],
|
|
[
|
|
"=",
|
|
"O"
|
|
],
|
|
[
|
|
"c",
|
|
"2"
|
|
],
|
|
[
|
|
"H",
|
|
"]"
|
|
],
|
|
[
|
|
"[",
|
|
"C"
|
|
],
|
|
[
|
|
"[C",
|
|
"@"
|
|
],
|
|
[
|
|
"c1",
|
|
"cc"
|
|
],
|
|
[
|
|
"[C@",
|
|
"@"
|
|
],
|
|
[
|
|
"c",
|
|
"3"
|
|
],
|
|
[
|
|
"c2",
|
|
"cc"
|
|
],
|
|
[
|
|
"[C@",
|
|
"H]"
|
|
],
|
|
[
|
|
"[C@@",
|
|
"H]"
|
|
],
|
|
[
|
|
"N",
|
|
"C"
|
|
],
|
|
[
|
|
"c1cc",
|
|
"c"
|
|
],
|
|
[
|
|
"CC",
|
|
"C"
|
|
],
|
|
[
|
|
"C",
|
|
"O"
|
|
],
|
|
[
|
|
"cc",
|
|
"1"
|
|
],
|
|
[
|
|
"=",
|
|
"C"
|
|
],
|
|
[
|
|
"c1cc",
|
|
"cc"
|
|
],
|
|
[
|
|
"n",
|
|
"1"
|
|
],
|
|
[
|
|
"N",
|
|
"1"
|
|
],
|
|
[
|
|
"n",
|
|
"c"
|
|
],
|
|
[
|
|
"c2cc",
|
|
"cc"
|
|
],
|
|
[
|
|
"O",
|
|
"C"
|
|
],
|
|
[
|
|
"c3",
|
|
"cc"
|
|
],
|
|
[
|
|
"C",
|
|
"l"
|
|
],
|
|
[
|
|
"C",
|
|
"1"
|
|
],
|
|
[
|
|
"N",
|
|
"2"
|
|
],
|
|
[
|
|
"CC",
|
|
"N"
|
|
],
|
|
[
|
|
"CC",
|
|
"1"
|
|
],
|
|
[
|
|
"c2cccc",
|
|
"c2"
|
|
],
|
|
[
|
|
"c2cc",
|
|
"c"
|
|
],
|
|
[
|
|
"n",
|
|
"2"
|
|
],
|
|
[
|
|
"O",
|
|
"=C"
|
|
],
|
|
[
|
|
"c1cccc",
|
|
"c1"
|
|
],
|
|
[
|
|
"C",
|
|
"2"
|
|
],
|
|
[
|
|
"CC",
|
|
"2"
|
|
],
|
|
[
|
|
"C",
|
|
"N"
|
|
],
|
|
[
|
|
"cc",
|
|
"2"
|
|
],
|
|
[
|
|
"CC",
|
|
"O"
|
|
],
|
|
[
|
|
"[C@@H]",
|
|
"1"
|
|
],
|
|
[
|
|
"C",
|
|
"[C@H]"
|
|
],
|
|
[
|
|
"c3cc",
|
|
"cc"
|
|
],
|
|
[
|
|
"[",
|
|
"n"
|
|
],
|
|
[
|
|
"[n",
|
|
"H]"
|
|
],
|
|
[
|
|
"c1",
|
|
"n"
|
|
],
|
|
[
|
|
"c",
|
|
"n"
|
|
],
|
|
[
|
|
"c",
|
|
"4"
|
|
],
|
|
[
|
|
"[C@@H]",
|
|
"2"
|
|
],
|
|
[
|
|
"[C@H]",
|
|
"1"
|
|
],
|
|
[
|
|
"c3cccc",
|
|
"c3"
|
|
],
|
|
[
|
|
"C",
|
|
"c1ccc"
|
|
],
|
|
[
|
|
"CC",
|
|
"CC"
|
|
],
|
|
[
|
|
"c2",
|
|
"c"
|
|
],
|
|
[
|
|
"[C@H]",
|
|
"2"
|
|
],
|
|
[
|
|
"CO",
|
|
"c1ccc"
|
|
],
|
|
[
|
|
"/",
|
|
"C"
|
|
],
|
|
[
|
|
"c2",
|
|
"n"
|
|
],
|
|
[
|
|
"C",
|
|
"[C@@H]"
|
|
],
|
|
[
|
|
"C",
|
|
"c1cc"
|
|
],
|
|
[
|
|
"c1",
|
|
"c"
|
|
],
|
|
[
|
|
"c3cc",
|
|
"c"
|
|
],
|
|
[
|
|
"C",
|
|
"NC"
|
|
],
|
|
[
|
|
"cc",
|
|
"cc"
|
|
],
|
|
[
|
|
"n",
|
|
"3"
|
|
],
|
|
[
|
|
"C",
|
|
"S"
|
|
],
|
|
[
|
|
"n",
|
|
"c1"
|
|
],
|
|
[
|
|
"CO",
|
|
"C"
|
|
],
|
|
[
|
|
"+",
|
|
"]"
|
|
],
|
|
[
|
|
"B",
|
|
"r"
|
|
],
|
|
[
|
|
"cc",
|
|
"3"
|
|
],
|
|
[
|
|
"N1",
|
|
"CCC"
|
|
],
|
|
[
|
|
"C",
|
|
"3"
|
|
],
|
|
[
|
|
"[",
|
|
"N"
|
|
],
|
|
[
|
|
"[N",
|
|
"+]"
|
|
],
|
|
[
|
|
"-",
|
|
"]"
|
|
],
|
|
[
|
|
"[",
|
|
"O"
|
|
],
|
|
[
|
|
"[O",
|
|
"-]"
|
|
],
|
|
[
|
|
"s",
|
|
"1"
|
|
],
|
|
[
|
|
"c1",
|
|
"nc"
|
|
],
|
|
[
|
|
"n",
|
|
"c2"
|
|
],
|
|
[
|
|
"N1",
|
|
"C"
|
|
],
|
|
[
|
|
"CC",
|
|
"OC"
|
|
],
|
|
[
|
|
"o",
|
|
"1"
|
|
],
|
|
[
|
|
"CC",
|
|
"CCC"
|
|
],
|
|
[
|
|
"CC",
|
|
"3"
|
|
],
|
|
[
|
|
"CCC",
|
|
"N"
|
|
],
|
|
[
|
|
"[C@",
|
|
"]"
|
|
]
|
|
]
|
|
}
|
|
} |