{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 256, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 257, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 258, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 259, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Split", "pattern": { "Regex": "\\(|\\)" }, "behavior": "Isolated", "invert": false }, "post_processor": null, "decoder": { "type": "BPEDecoder", "suffix": "" }, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "#": 0, "%": 1, "(": 2, ")": 3, "+": 4, "-": 5, "/": 6, "0": 7, "1": 8, "2": 9, "3": 10, "4": 11, "5": 12, "6": 13, "7": 14, "8": 15, "9": 16, "=": 17, "@": 18, "B": 19, "C": 20, "F": 21, "H": 22, "I": 23, "N": 24, "O": 25, "P": 26, "S": 27, "[": 28, "\\": 29, "]": 30, "c": 31, "i": 32, "l": 33, "n": 34, "o": 35, "r": 36, "s": 37, "cc": 38, "CC": 39, "c1": 40, "=O": 41, "c2": 42, "H]": 43, "[C": 44, "[C@": 45, "c1cc": 46, "[C@@": 47, "c3": 48, "c2cc": 49, "[C@H]": 50, "[C@@H]": 51, "NC": 52, "c1ccc": 53, "CCC": 54, "CO": 55, "cc1": 56, "=C": 57, "c1cccc": 58, "n1": 59, "N1": 60, "nc": 61, "c2cccc": 62, "OC": 63, "c3cc": 64, "Cl": 65, "C1": 66, "N2": 67, "CCN": 68, "CC1": 69, "c2ccccc2": 70, "c2ccc": 71, "n2": 72, "O=C": 73, "c1ccccc1": 74, "C2": 75, "CC2": 76, "CN": 77, "cc2": 78, "CCO": 79, "[C@@H]1": 80, "C[C@H]": 81, "c3cccc": 82, "[n": 83, "[nH]": 84, "c1n": 85, "cn": 86, "c4": 87, "[C@@H]2": 88, "[C@H]1": 89, "c3ccccc3": 90, "Cc1ccc": 91, "CCCC": 92, "c2c": 93, "[C@H]2": 94, "COc1ccc": 95, "/C": 96, "c2n": 97, "C[C@@H]": 98, "Cc1cc": 99, "c1c": 100, "c3ccc": 101, "CNC": 102, "cccc": 103, "n3": 104, "CS": 105, "nc1": 106, "COC": 107, "+]": 108, "Br": 109, "cc3": 110, "N1CCC": 111, "C3": 112, "[N": 113, "[N+]": 114, "-]": 115, "[O": 116, "[O-]": 117, "s1": 118, "c1nc": 119, "nc2": 120, "N1C": 121, "CCOC": 122, "o1": 123, "CCCCC": 124, "CC3": 125, "CCCN": 126, "[C@]": 127, "ccc1": 128, "[C@@]": 129, "OCC": 130, "Cn1": 131, "c3c": 132, "N3": 133, "N1CC": 134, "Cc1n": 135, "ccc": 136, "NCC": 137, "CN2": 138, "N1CCN": 139, "c2nc": 140, "c1C": 141, "=S": 142, "=N": 143, "C[C@@H]1": 144, "COc1cc": 145, "Nc1ccc": 146, "/C=C": 147, "o2": 148, "CCCO": 149, "C#": 150, "C[C@H]1": 151, "Cc1ccccc1": 152, "ccc2": 153, "s2": 154, "[C@H]3": 155, "[C@@H]3": 156, "N2CCC": 157, "Cc1cccc": 158, "c4cccc": 159, "c1cn": 160, "oc": 161, "CCNC": 162, "sc": 163, "nn": 164, "N2C": 165, "-c2ccc": 166, "C#N": 167, "N2CC": 168, "Cc1c": 169, "CCS": 170, "CCn1": 171, "C1=O": 172, "C2=O": 173, "cc1C": 174, "[nH]c": 175, "O=C1": 176, "-c2ccccc2": 177, "N=C": 178, "Cc2ccccc2": 179, "c12": 180, "N2CCN": 181, "CC[C@H]": 182, "CC[C@@H]": 183, "c4ccccc4": 184, "C[C@H]2": 185, "c1=O": 186, "NS": 187, "Cc1nc": 188, "c1ccc2c": 189, "NC1": 190, "COc1cccc": 191, "Nc2ccc": 192, "c3n": 193, "cn1": 194, "[nH]1": 195, "C=": 196, "COc1ccccc1": 197, "c2ccccc21": 198, "c2cn": 199, "N[C@H]": 200, "N[C@@H]": 201, "c1nnc": 202, "c2c1": 203, "CCOc1ccc": 204, "SCC": 205, "CCOCC2": 206, "OCO": 207, "c3nc": 208, "c1ncc": 209, "Cc2ccc": 210, "cc1OC": 211, "NC2": 212, "nc3": 213, "CCCCC2": 214, "C=C": 215, "N1CCO": 216, "nn1": 217, "[C@]1": 218, "CCCC2": 219, "-n2": 220, "c2ncc": 221, "C[C@@H]2": 222, "cc1Cl": 223, "Nc1cccc": 224, "c2nnc": 225, "[C@@]1": 226, "CCCC3": 227, "CCCCC1": 228, "c2ccc3c": 229, "-c3ccccc3": 230, "nn2": 231, "c4ccc": 232, "CCC2": 233, "no1": 234, "OCCO": 235, "O1": 236, "Cn2": 237, "SC": 238, "-c3ccc": 239, "Nc1cc": 240, "/C=C/": 241, "Cn1cc": 242, "N[C@@H]1": 243, "c23": 244, "Nc1ccccc1": 245, "N#": 246, "CN1C": 247, "[C@@]2": 248, "[C@]2": 249, "C2CC2": 250, "c2C": 251, "o3": 252, "NCCC": 253, "N2CCOCC2": 254, "N[C@H]1": 255 }, "merges": [ [ "c", "c" ], [ "C", "C" ], [ "c", "1" ], [ "=", "O" ], [ "c", "2" ], [ "H", "]" ], [ "[", "C" ], [ "[C", "@" ], [ "c1", "cc" ], [ "[C@", "@" ], [ "c", "3" ], [ "c2", "cc" ], [ "[C@", "H]" ], [ "[C@@", "H]" ], [ "N", "C" ], [ "c1cc", "c" ], [ "CC", "C" ], [ "C", "O" ], [ "cc", "1" ], [ "=", "C" ], [ "c1cc", "cc" ], [ "n", "1" ], [ "N", "1" ], [ "n", "c" ], [ "c2cc", "cc" ], [ "O", "C" ], [ "c3", "cc" ], [ "C", "l" ], [ "C", "1" ], [ "N", "2" ], [ "CC", "N" ], [ "CC", "1" ], [ "c2cccc", "c2" ], [ "c2cc", "c" ], [ "n", "2" ], [ "O", "=C" ], [ "c1cccc", "c1" ], [ "C", "2" ], [ "CC", "2" ], [ "C", "N" ], [ "cc", "2" ], [ "CC", "O" ], [ "[C@@H]", "1" ], [ "C", "[C@H]" ], [ "c3cc", "cc" ], [ "[", "n" ], [ "[n", "H]" ], [ "c1", "n" ], [ "c", "n" ], [ "c", "4" ], [ "[C@@H]", "2" ], [ "[C@H]", "1" ], [ "c3cccc", "c3" ], [ "C", "c1ccc" ], [ "CC", "CC" ], [ "c2", "c" ], [ "[C@H]", "2" ], [ "CO", "c1ccc" ], [ "/", "C" ], [ "c2", "n" ], [ "C", "[C@@H]" ], [ "C", "c1cc" ], [ "c1", "c" ], [ "c3cc", "c" ], [ "C", "NC" ], [ "cc", "cc" ], [ "n", "3" ], [ "C", "S" ], [ "n", "c1" ], [ "CO", "C" ], [ "+", "]" ], [ "B", "r" ], [ "cc", "3" ], [ "N1", "CCC" ], [ "C", "3" ], [ "[", "N" ], [ "[N", "+]" ], [ "-", "]" ], [ "[", "O" ], [ "[O", "-]" ], [ "s", "1" ], [ "c1", "nc" ], [ "n", "c2" ], [ "N1", "C" ], [ "CC", "OC" ], [ "o", "1" ], [ "CC", "CCC" ], [ "CC", "3" ], [ "CCC", "N" ], [ "[C@", "]" ], [ "cc", "c1" ], [ "[C@@", "]" ], [ "O", "CC" ], [ "C", "n1" ], [ "c3", "c" ], [ "N", "3" ], [ "N1", "CC" ], [ "C", "c1n" ], [ "cc", "c" ], [ "N", "CC" ], [ "C", "N2" ], [ "N1", "CCN" ], [ "c2", "nc" ], [ "c1", "C" ], [ "=", "S" ], [ "=", "N" ], [ "C", "[C@@H]1" ], [ "CO", "c1cc" ], [ "N", "c1ccc" ], [ "/C", "=C" ], [ "o", "2" ], [ "CCC", "O" ], [ "C", "#" ], [ "C[C@H]", "1" ], [ "C", "c1ccccc1" ], [ "cc", "c2" ], [ "s", "2" ], [ "[C@H]", "3" ], [ "[C@@H]", "3" ], [ "N2", "CCC" ], [ "C", "c1cccc" ], [ "c4", "cccc" ], [ "c1", "cn" ], [ "o", "c" ], [ "CC", "NC" ], [ "s", "c" ], [ "n", "n" ], [ "N2", "C" ], [ "-", "c2ccc" ], [ "C#", "N" ], [ "N2", "CC" ], [ "C", "c1c" ], [ "CC", "S" ], [ "CC", "n1" ], [ "C1", "=O" ], [ "C2", "=O" ], [ "cc1", "C" ], [ "[nH]", "c" ], [ "O=C", "1" ], [ "-", "c2ccccc2" ], [ "N", "=C" ], [ "C", "c2ccccc2" ], [ "c1", "2" ], [ "N2", "CCN" ], [ "CC", "[C@H]" ], [ "CC", "[C@@H]" ], [ "c4cccc", "c4" ], [ "C[C@H]", "2" ], [ "c1", "=O" ], [ "N", "S" ], [ "C", "c1nc" ], [ "c1cc", "c2c" ], [ "NC", "1" ], [ "CO", "c1cccc" ], [ "N", "c2ccc" ], [ "c3", "n" ], [ "c", "n1" ], [ "[nH]", "1" ], [ "C", "=" ], [ "CO", "c1ccccc1" ], [ "c2ccccc2", "1" ], [ "c2", "cn" ], [ "N", "[C@H]" ], [ "N", "[C@@H]" ], [ "c1n", "nc" ], [ "c2", "c1" ], [ "CCO", "c1ccc" ], [ "S", "CC" ], [ "CCO", "CC2" ], [ "O", "CO" ], [ "c3", "nc" ], [ "c1n", "cc" ], [ "C", "c2ccc" ], [ "cc1", "OC" ], [ "NC", "2" ], [ "n", "c3" ], [ "CCCCC", "2" ], [ "C", "=C" ], [ "N1", "CCO" ], [ "n", "n1" ], [ "[C@]", "1" ], [ "CC", "CC2" ], [ "-", "n2" ], [ "c2n", "cc" ], [ "C", "[C@@H]2" ], [ "cc1", "Cl" ], [ "N", "c1cccc" ], [ "c2n", "nc" ], [ "[C@@]", "1" ], [ "CCCC", "3" ], [ "CCCCC", "1" ], [ "c2cc", "c3c" ], [ "-", "c3ccccc3" ], [ "n", "n2" ], [ "c4", "ccc" ], [ "CCC", "2" ], [ "n", "o1" ], [ "O", "CCO" ], [ "O", "1" ], [ "C", "n2" ], [ "S", "C" ], [ "-", "c3ccc" ], [ "N", "c1cc" ], [ "/C=C", "/" ], [ "Cn1", "cc" ], [ "N", "[C@@H]1" ], [ "c2", "3" ], [ "N", "c1ccccc1" ], [ "N", "#" ], [ "C", "N1C" ], [ "[C@@]", "2" ], [ "[C@]", "2" ], [ "C2", "CC2" ], [ "c2", "C" ], [ "o", "3" ], [ "N", "CCC" ], [ "N2", "CCOCC2" ], [ "N", "[C@H]1" ] ] } }