239 lines
3.4 KiB
JSON
239 lines
3.4 KiB
JSON
{
|
|
"version": "1.0",
|
|
"truncation": null,
|
|
"padding": null,
|
|
"added_tokens": [
|
|
{
|
|
"id": 64,
|
|
"content": "<pad>",
|
|
"single_word": false,
|
|
"lstrip": false,
|
|
"rstrip": false,
|
|
"normalized": false,
|
|
"special": true
|
|
},
|
|
{
|
|
"id": 65,
|
|
"content": "<s>",
|
|
"single_word": false,
|
|
"lstrip": false,
|
|
"rstrip": false,
|
|
"normalized": false,
|
|
"special": true
|
|
},
|
|
{
|
|
"id": 66,
|
|
"content": "</s>",
|
|
"single_word": false,
|
|
"lstrip": false,
|
|
"rstrip": false,
|
|
"normalized": false,
|
|
"special": true
|
|
},
|
|
{
|
|
"id": 67,
|
|
"content": "<unk>",
|
|
"single_word": false,
|
|
"lstrip": false,
|
|
"rstrip": false,
|
|
"normalized": false,
|
|
"special": true
|
|
}
|
|
],
|
|
"normalizer": null,
|
|
"pre_tokenizer": {
|
|
"type": "Split",
|
|
"pattern": {
|
|
"Regex": "\\(|\\)|(?<=\\d)(?=[A-Z\\[\\(])"
|
|
},
|
|
"behavior": "MergedWithPrevious",
|
|
"invert": false
|
|
},
|
|
"post_processor": null,
|
|
"decoder": {
|
|
"type": "BPEDecoder",
|
|
"suffix": "</w>"
|
|
},
|
|
"model": {
|
|
"type": "BPE",
|
|
"dropout": null,
|
|
"unk_token": null,
|
|
"continuing_subword_prefix": null,
|
|
"end_of_word_suffix": null,
|
|
"fuse_unk": false,
|
|
"byte_fallback": false,
|
|
"ignore_merges": false,
|
|
"vocab": {
|
|
"#": 0,
|
|
"%": 1,
|
|
"(": 2,
|
|
")": 3,
|
|
"+": 4,
|
|
"-": 5,
|
|
"/": 6,
|
|
"0": 7,
|
|
"1": 8,
|
|
"2": 9,
|
|
"3": 10,
|
|
"4": 11,
|
|
"5": 12,
|
|
"6": 13,
|
|
"7": 14,
|
|
"8": 15,
|
|
"9": 16,
|
|
"=": 17,
|
|
"@": 18,
|
|
"B": 19,
|
|
"C": 20,
|
|
"F": 21,
|
|
"H": 22,
|
|
"I": 23,
|
|
"N": 24,
|
|
"O": 25,
|
|
"P": 26,
|
|
"S": 27,
|
|
"[": 28,
|
|
"\\": 29,
|
|
"]": 30,
|
|
"c": 31,
|
|
"i": 32,
|
|
"l": 33,
|
|
"n": 34,
|
|
"o": 35,
|
|
"r": 36,
|
|
"s": 37,
|
|
"cc": 38,
|
|
"CC": 39,
|
|
"O)": 40,
|
|
"c1": 41,
|
|
"=O)": 42,
|
|
"C(": 43,
|
|
"c(": 44,
|
|
"c2": 45,
|
|
"H]": 46,
|
|
"[C": 47,
|
|
"[C@": 48,
|
|
"C)": 49,
|
|
"c1cc": 50,
|
|
"[C@@": 51,
|
|
"c3": 52,
|
|
"c2cc": 53,
|
|
"2)": 54,
|
|
"[C@H]": 55,
|
|
"[C@@H]": 56,
|
|
"N(": 57,
|
|
"c1ccc(": 58,
|
|
"CO": 59,
|
|
"F)": 60,
|
|
"cc1": 61,
|
|
"NC(": 62,
|
|
"C1": 63
|
|
},
|
|
"merges": [
|
|
[
|
|
"c",
|
|
"c"
|
|
],
|
|
[
|
|
"C",
|
|
"C"
|
|
],
|
|
[
|
|
"O",
|
|
")"
|
|
],
|
|
[
|
|
"c",
|
|
"1"
|
|
],
|
|
[
|
|
"=",
|
|
"O)"
|
|
],
|
|
[
|
|
"C",
|
|
"("
|
|
],
|
|
[
|
|
"c",
|
|
"("
|
|
],
|
|
[
|
|
"c",
|
|
"2"
|
|
],
|
|
[
|
|
"H",
|
|
"]"
|
|
],
|
|
[
|
|
"[",
|
|
"C"
|
|
],
|
|
[
|
|
"[C",
|
|
"@"
|
|
],
|
|
[
|
|
"C",
|
|
")"
|
|
],
|
|
[
|
|
"c1",
|
|
"cc"
|
|
],
|
|
[
|
|
"[C@",
|
|
"@"
|
|
],
|
|
[
|
|
"c",
|
|
"3"
|
|
],
|
|
[
|
|
"c2",
|
|
"cc"
|
|
],
|
|
[
|
|
"2",
|
|
")"
|
|
],
|
|
[
|
|
"[C@",
|
|
"H]"
|
|
],
|
|
[
|
|
"[C@@",
|
|
"H]"
|
|
],
|
|
[
|
|
"N",
|
|
"("
|
|
],
|
|
[
|
|
"c1cc",
|
|
"c("
|
|
],
|
|
[
|
|
"C",
|
|
"O"
|
|
],
|
|
[
|
|
"F",
|
|
")"
|
|
],
|
|
[
|
|
"cc",
|
|
"1"
|
|
],
|
|
[
|
|
"N",
|
|
"C("
|
|
],
|
|
[
|
|
"C",
|
|
"1"
|
|
]
|
|
]
|
|
}
|
|
} |