Files
gpt-neox-20b/vocab.json

1 line
1.0 MiB
JSON
Raw Permalink Normal View History

{"<|endoftext|>": 0, "<|padding|>": 1, "!": 2, "\"": 3, "#": 4, "$": 5, "%": 6, "&": 7, "'": 8, "(": 9, ")": 10, "*": 11, "+": 12, ",": 13, "-": 14, ".": 15, "/": 16, "0": 17, "1": 18, "2": 19, "3": 20, "4": 21, "5": 22, "6": 23, "7": 24, "8": 25, "9": 26, ":": 27, ";": 28, "<": 29, "=": 30, ">": 31, "?": 32, "@": 33, "A": 34, "B": 35, "C": 36, "D": 37, "E": 38, "F": 39, "G": 40, "H": 41, "I": 42, "J": 43, "K": 44, "L": 45, "M": 46, "N": 47, "O": 48, "P": 49, "Q": 50, "R": 51, "S": 52, "T": 53, "U": 54, "V": 55, "W": 56, "X": 57, "Y": 58, "Z": 59, "[": 60, "\\": 61, "]": 62, "^": 63, "_": 64, "`": 65, "a": 66, "b": 67, "c": 68, "d": 69, "e": 70, "f": 71, "g": 72, "h": 73, "i": 74, "j": 75, "k": 76, "l": 77, "m": 78, "n": 79, "o": 80, "p": 81, "q": 82, "r": 83, "s": 84, "t": 85, "u": 86, "v": 87, "w": 88, "x": 89, "y": 90, "z": 91, "{": 92, "|": 93, "}": 94, "~": 95, "\u00a1": 96, "\u00a2": 97, "\u00a3": 98, "\u00a4": 99, "\u00a5": 100, "\u00a6": 101, "\u00a7": 102, "\u00a8": 103, "\u00a9": 104, "\u00aa": 105, "\u00ab": 106, "\u00ac": 107, "\u00ae": 108, "\u00af": 109, "\u00b0": 110, "\u00b1": 111, "\u00b2": 112, "\u00b3": 113, "\u00b4": 114, "\u00b5": 115, "\u00b6": 116, "\u00b7": 117, "\u00b8": 118, "\u00b9": 119, "\u00ba": 120, "\u00bb": 121, "\u00bc": 122, "\u00bd": 123, "\u00be": 124, "\u00bf": 125, "\u00c2": 126, "\u00c3": 127, "\u00c4": 128, "\u00c5": 129, "\u00c6": 130, "\u00c7": 131, "\u00c8": 132, "\u00c9": 133, "\u00ca": 134, "\u00cb": 135, "\u00cc": 136, "\u00cd": 137, "\u00ce": 138, "\u00cf": 139, "\u00d0": 140, "\u00d1": 141, "\u00d2": 142, "\u00d3": 143, "\u00d4": 144, "\u00d5": 145, "\u00d6": 146, "\u00d7": 147, "\u00d8": 148, "\u00d9": 149, "\u00da": 150, "\u00db": 151, "\u00dc": 152, "\u00dd": 153, "\u00de": 154, "\u00df": 155, "\u00e0": 156, "\u00e1": 157, "\u00e2": 158, "\u00e3": 159, "\u00e4": 160, "\u00e5": 161, "\u00e6": 162, "\u00e7": 163, "\u00e8": 164, "\u00e9": 165, "\u00ea": 166, "\u00eb": 167, "\u00ec": 168, "\u00ed": 169, "\u00ee": 170, "\u00ef": 171, "\u00f0": 172, "\u00f1": 173, "\u00f2": 174, "\u00f3": 175, "\u00f4": 176, "\u0100": 177, "\u0101": 178, "\u0102": 179, "\u0103": 180, "\u0104": 181, "\u0105": 182, "\u0106": 183, "\u0107": 184, "\u0108": 185, "\u0109": 186, "\u010a": 187, "\u010b": 188, "\u010c": 189, "\u010d": 190, "\u010e": 191, "\u010f": 192, "\u0110": 193, "\u0111": 194, "\u0112": 195, "\u0113": 196, "\u0114": 197, "\u0115": 198, "\u0116": 199, "\u0117": 200, "\u0118": 201, "\u0119": 202, "\u011a": 203, "\u011b": 204, "\u011c": 205, "\u011d": 206, "\u011e": 207, "\u011f": 208, "\u0120": 209, "\u0121": 210, "\u0122": 211, "\u0123": 212, "\u0124": 213, "\u0125": 214, "\u0126": 215, "\u0127": 216, "\u0128": 217, "\u0129": 218, "\u012a": 219, "\u012b": 220, "\u012c": 221, "\u012d": 222, "\u012e": 223, "\u012f": 224, "\u0130": 225, "\u0131": 226, "\u0132": 227, "\u0133": 228, "\u0134": 229, "\u0135": 230, "\u0136": 231, "\u0137": 232, "\u0138": 233, "\u0139": 234, "\u013a": 235, "\u013b": 236, "\u013c": 237, "\u013d": 238, "\u013e": 239, "\u013f": 240, "\u0140": 241, "\u0141": 242, "\u0142": 243, "\u0143": 244, "\u0120\u0120": 245, "\u0120t": 246, "\u0120a": 247, "he": 248, "in": 249, "re": 250, "on": 251, "\u0120\u0120\u0120\u0120": 252, "\u0120the": 253, "er": 254, "at": 255, "\u0120s": 256, "en": 257, "\u0120o": 258, "\u0120w": 259, "\u0120c": 260, "is": 261, "it": 262, "or": 263, "ed": 264, "es": 265, "an": 266, "al": 267, "\u0120p": 268, "\u0120f": 269, "\u0120b": 270, "\u0120an": 271, "ing": 272, "\u0120of": 273, "ar": 274, "\u0120in": 275, "ou": 276, "\u0120d": 277, "\u0120m": 278, "ion": 279, "ic": 280, "\u0120to": 281, "le": 282, "--": 283, "as": 284, "\u0120and": 285, "\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120": 286, "ro": 287, "\u0120h": 288, "\u0120th": 289, "ent": 290, "ct": 291, "et": 292, "el": 293, "\u0120re": 294, "\u0120n": 295, "st": 296, "om": 297, "\u0120l": 298, "\u0120e": 299, "il": 300, "id": 301, "ot": 302, "im": 303, "ig": 304, "\u0120g": 305, "ve": 306, "ut": 307, "\u0120T": 308, "\u0120I": 309, "\u0120is": 310, "ol": 311, "am": 312, "\u0120(":