Files
gpt2-medium-indonesian/vocab.json

1 line
1.0 MiB
JSON
Raw Permalink Normal View History

{"<s>": 0, "<pad>": 1, "</s>": 2, "<unk>": 3, "<mask>": 4, "!": 5, "\"": 6, "#": 7, "$": 8, "%": 9, "&": 10, "'": 11, "(": 12, ")": 13, "*": 14, "+": 15, ",": 16, "-": 17, ".": 18, "/": 19, "0": 20, "1": 21, "2": 22, "3": 23, "4": 24, "5": 25, "6": 26, "7": 27, "8": 28, "9": 29, ":": 30, ";": 31, "<": 32, "=": 33, ">": 34, "?": 35, "@": 36, "A": 37, "B": 38, "C": 39, "D": 40, "E": 41, "F": 42, "G": 43, "H": 44, "I": 45, "J": 46, "K": 47, "L": 48, "M": 49, "N": 50, "O": 51, "P": 52, "Q": 53, "R": 54, "S": 55, "T": 56, "U": 57, "V": 58, "W": 59, "X": 60, "Y": 61, "Z": 62, "[": 63, "\\": 64, "]": 65, "^": 66, "_": 67, "`": 68, "a": 69, "b": 70, "c": 71, "d": 72, "e": 73, "f": 74, "g": 75, "h": 76, "i": 77, "j": 78, "k": 79, "l": 80, "m": 81, "n": 82, "o": 83, "p": 84, "q": 85, "r": 86, "s": 87, "t": 88, "u": 89, "v": 90, "w": 91, "x": 92, "y": 93, "z": 94, "{": 95, "|": 96, "}": 97, "~": 98, "\u00a1": 99, "\u00a2": 100, "\u00a3": 101, "\u00a4": 102, "\u00a5": 103, "\u00a6": 104, "\u00a7": 105, "\u00a8": 106, "\u00a9": 107, "\u00aa": 108, "\u00ab": 109, "\u00ac": 110, "\u00ae": 111, "\u00af": 112, "\u00b0": 113, "\u00b1": 114, "\u00b2": 115, "\u00b3": 116, "\u00b4": 117, "\u00b5": 118, "\u00b6": 119, "\u00b7": 120, "\u00b8": 121, "\u00b9": 122, "\u00ba": 123, "\u00bb": 124, "\u00bc": 125, "\u00bd": 126, "\u00be": 127, "\u00bf": 128, "\u00c0": 129, "\u00c1": 130, "\u00c2": 131, "\u00c3": 132, "\u00c4": 133, "\u00c5": 134, "\u00c6": 135, "\u00c7": 136, "\u00c8": 137, "\u00c9": 138, "\u00ca": 139, "\u00cb": 140, "\u00cc": 141, "\u00cd": 142, "\u00ce": 143, "\u00cf": 144, "\u00d0": 145, "\u00d1": 146, "\u00d2": 147, "\u00d3": 148, "\u00d4": 149, "\u00d5": 150, "\u00d6": 151, "\u00d7": 152, "\u00d8": 153, "\u00d9": 154, "\u00da": 155, "\u00db": 156, "\u00dc": 157, "\u00dd": 158, "\u00de": 159, "\u00df": 160, "\u00e0": 161, "\u00e1": 162, "\u00e2": 163, "\u00e3": 164, "\u00e4": 165, "\u00e5": 166, "\u00e6": 167, "\u00e7": 168, "\u00e8": 169, "\u00e9": 170, "\u00ea": 171, "\u00eb": 172, "\u00ec": 173, "\u00ed": 174, "\u00ee": 175, "\u00ef": 176, "\u00f0": 177, "\u00f1": 178, "\u00f2": 179, "\u00f3": 180, "\u00f4": 181, "\u00f5": 182, "\u00f6": 183, "\u00f7": 184, "\u00f8": 185, "\u00f9": 186, "\u00fa": 187, "\u00fb": 188, "\u00fc": 189, "\u00fd": 190, "\u00fe": 191, "\u00ff": 192, "\u0100": 193, "\u0101": 194, "\u0102": 195, "\u0103": 196, "\u0104": 197, "\u0105": 198, "\u0106": 199, "\u0107": 200, "\u0108": 201, "\u0109": 202, "\u010a": 203, "\u010b": 204, "\u010c": 205, "\u010d": 206, "\u010e": 207, "\u010f": 208, "\u0110": 209, "\u0111": 210, "\u0112": 211, "\u0113": 212, "\u0114": 213, "\u0115": 214, "\u0116": 215, "\u0117": 216, "\u0118": 217, "\u0119": 218, "\u011a": 219, "\u011b": 220, "\u011c": 221, "\u011d": 222, "\u011e": 223, "\u011f": 224, "\u0120": 225, "\u0121": 226, "\u0122": 227, "\u0123": 228, "\u0124": 229, "\u0125": 230, "\u0126": 231, "\u0127": 232, "\u0128": 233, "\u0129": 234, "\u012a": 235, "\u012b": 236, "\u012c": 237, "\u012d": 238, "\u012e": 239, "\u012f": 240, "\u0130": 241, "\u0131": 242, "\u0132": 243, "\u0133": 244, "\u0134": 245, "\u0135": 246, "\u0136": 247, "\u0137": 248, "\u0138": 249, "\u0139": 250, "\u013a": 251, "\u013b": 252, "\u013c": 253, "\u013d": 254, "\u013e": 255, "\u013f": 256, "\u0140": 257, "\u0141": 258, "\u0142": 259, "\u0143": 260, "an": 261, "\u0120d": 262, "er": 263, "en": 264, "\u0120m": 265, "\u0120s": 266, "ang": 267, "ar": 268, "at": 269, "in": 270, "\u0120b": 271, "ak": 272, "\u0120p": 273, "\u0120k": 274, "ah": 275, "\u0120t": 276, "al": 277, "un": 278, "as": 279, "em": 280, "am": 281, "ya": 282, "\u0120di": 283, "\u0120y": 284, "eng": 285, "ad": 286, "\u0120yang": 287, "tu": 288, "kan": 289, "el": 290, "ap": 291, "\u0120se": 292, "is": 293, "ik": 294, "\u0120men": 295, "\u0120dan": 296, "\u0120l": 297, "\u0120ber": 298, "ga": 299, "il": 300, "it": 301, "es": 302, "nya": 303, "ing": 304, "\u0120h": 305, "\u0120mem": 306, "\u0120ter": 307, "\u0120j": 308, "eb": 309, "on": 310, "akan": 311, "us": 312, "\u0120in": 313, "ari": 314, "or": 315, "\u0120ke": 316, "engan": 317, "id"