{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 33, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 34, "content": "[EOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 35, "content": "[BOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Split", "pattern": { "Regex": "(\\[[^\\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\\(|\\)|\\.|=|#|\\+|\\/|:|@|\\?|>|\\*|\\$|%[0-9]{2}|[0-9])" }, "behavior": "Isolated", "invert": false }, "post_processor": null, "decoder": { "type": "Fuse" }, "model": { "type": "WordPiece", "unk_token": "[UNK]", "continuing_subword_prefix": "##", "max_input_chars_per_word": 100, "vocab": { "#": 0, "=": 1, "-": 2, "(": 3, ")": 4, "1": 5, "2": 6, "3": 7, "4": 8, "5": 9, "6": 10, "7": 11, "8": 12, "9": 13, "%10": 14, "Br": 15, "C": 16, "Cl": 17, "F": 18, "N": 19, "O": 20, "S": 21, "[N+]": 22, "[N-]": 23, "[O-]": 24, "[S+]": 25, "[n+]": 26, "[nH]": 27, "c": 28, "n": 29, "o": 30, "s": 31, "[UNK]": 32 } } }