Files
electra-small-nli-sts/tokenizer.json

1 line
484 KiB
JSON
Raw Normal View History

{"version":"1.0","truncation":{"max_length":512,"strategy":"LongestFirst","stride":0},"padding":{"strategy":"BatchLongest","direction":"Right","pad_to_multiple_of":null,"pad_id":0,"pad_type_id":0,"pad_token":"[PAD]"},"added_tokens":[{"id":0,"special":true,"content":"[PAD]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":1,"special":true,"content":"[UNK]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":2,"special":true,"content":"[CLS]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":3,"special":true,"content":"[SEP]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":4,"special":true,"content":"[MASK]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false}],"normalizer":{"type":"BertNormalizer","clean_text":true,"handle_chinese_chars":true,"strip_accents":null,"lowercase":false},"pre_tokenizer":{"type":"BertPreTokenizer"},"post_processor":{"type":"TemplateProcessing","single":[{"SpecialToken":{"id":"[CLS]","type_id":0}},{"Sequence":{"id":"A","type_id":0}},{"SpecialToken":{"id":"[SEP]","type_id":0}}],"pair":[{"SpecialToken":{"id":"[CLS]","type_id":0}},{"Sequence":{"id":"A","type_id":0}},{"SpecialToken":{"id":"[SEP]","type_id":0}},{"Sequence":{"id":"B","type_id":1}},{"SpecialToken":{"id":"[SEP]","type_id":1}}],"special_tokens":{"[CLS]":{"id":"[CLS]","ids":[2],"tokens":["[CLS]"]},"[SEP]":{"id":"[SEP]","ids":[3],"tokens":["[SEP]"]}}},"decoder":{"type":"WordPiece","prefix":"##","cleanup":true},"model":{"type":"WordPiece","unk_token":"[UNK]","continuing_subword_prefix":"##","max_input_chars_per_word":100,"vocab":{"[PAD]":0,"[UNK]":1,"[CLS]":2,"[SEP]":3,"[MASK]":4,"!":5,"\"":6,"#":7,"$":8,"%":9,"&":10,"'":11,"(":12,")":13,"*":14,"+":15,",":16,"-":17,".":18,"/":19,"0":20,"1":21,"2":22,"3":23,"4":24,"5":25,"6":26,"7":27,"8":28,"9":29,":":30,";":31,"<":32,"=":33,">":34,"?":35,"@":36,"A":37,"B":38,"C":39,"D":40,"E":41,"F":42,"G":43,"H":44,"I":45,"J":46,"K":47,"L":48,"M":49,"N":50,"O":51,"P":52,"Q":53,"R":54,"S":55,"T":56,"U":57,"V":58,"W":59,"X":60,"Y":61,"Z":62,"[":63,"\\":64,"]":65,"^":66,"_":67,"`":68,"a":69,"b":70,"c":71,"d":72,"e":73,"f":74,"g":75,"h":76,"i":77,"j":78,"k":79,"l":80,"m":81,"n":82,"o":83,"p":84,"q":85,"r":86,"s":87,"t":88,"u":89,"v":90,"w":91,"x":92,"y":93,"z":94,"{":95,"|":96,"}":97,"~":98,"·":99,"":100,"ㄱ":101,"ㄲ":102,"ㄳ":103,"ㄴ":104,"ㄵ":105,"ㄶ":106,"ㄷ":107,"ㄸ":108,"ㄹ":109,"ㄺ":110,"ㄻ":111,"ㄼ":112,"ㄽ":113,"ㄾ":114,"ㄿ":115,"ㅀ":116,"ㅁ":117,"ㅂ":118,"ㅃ":119,"ㅄ":120,"ㅅ":121,"ㅆ":122,"ㅇ":123,"ㅈ":124,"ㅉ":125,"ㅊ":126,"ㅋ":127,"ㅌ":128,"ㅍ":129,"ㅎ":130,"ㅏ":131,"ㅐ":132,"ㅑ":133,"ㅒ":134,"ㅓ":135,"ㅔ":136,"ㅕ":137,"ㅖ":138,"ㅗ":139,"ㅘ":140,"ㅙ":141,"ㅚ":142,"ㅛ":143,"ㅜ":144,"ㅝ":145,"ㅞ":146,"ㅟ":147,"ㅠ":148,"ㅡ":149,"ㅢ":150,"ㅣ":151,"":152,"가":153,"각":154,"갂":155,"갃":156,"간":157,"갅":158,"갆":159,"갇":160,"갈":161,"갉":162,"갊":163,"갋":164,"갌":165,"갍":166,"갎":167,"갏":168,"감":169,"갑":170,"값":171,"갓":172,"갔":173,"강":174,"갖":175,"갗":176,"갘":177,"같":178,"갚":179,"갛":180,"개":181,"객":182,"갞":183,"갠":184,"갢":185,"갣":186,"갤":187,"갥":188,"갨":189,"갬":190,"갭":191,"갮":192,"갯":193,"갰":194,"갱":195,"갴":196,"갵":197,"갶":198,"갷":199,"갸":200,"갹":201,"갺":202,"갻":203,"갼":204,"갽":205,"갿":206,"걀":207,"걄":208,"걈":209,"걉":210,"걊":211,"걋":212,"걌":213,"걍":214,"걐":215,"걑":216,"걔":217,"걕":218,"걖":219,"걘":220,"걜":221,"걠":222,"걥":223,"걧":224,"걩":225,"거":226,"걱":227,"걲":228,"걳":229,"건":230,"걵":231,"걷":232,"걸":233,"걹":234,"걺":235,"걼":236,"걽":237,"걿":238,"검":239,"겁":240,"겂":241,"것":242,"겄":243,"겅":244,"겆":245,"겇":246,"겈":247,"겉":248,"겊":249,"겋":250,"게":251,"겍":252,"겐":253,"겓":254,"겔":255,"겕":256,"겖":257,"겜":258,"겝":259,"겟":260,"겠":261,"겡":262,"겢":263,"겣":264,"겤":265,"겥":266,"겦":267,"겧":268,"겨":269,"격":270,"겪":271,"