Files
klue-roberta-small-nli-sts/tokenizer.json

1 line
483 KiB
JSON
Raw Permalink Normal View History

{"version":"1.0","truncation":{"max_length":512,"strategy":"LongestFirst","stride":0},"padding":{"strategy":"BatchLongest","direction":"Right","pad_to_multiple_of":null,"pad_id":1,"pad_type_id":0,"pad_token":"[PAD]"},"added_tokens":[{"id":0,"special":true,"content":"[CLS]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":1,"special":true,"content":"[PAD]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":2,"special":true,"content":"[SEP]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":3,"special":true,"content":"[UNK]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":4,"special":true,"content":"[MASK]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false}],"normalizer":{"type":"BertNormalizer","clean_text":true,"handle_chinese_chars":true,"strip_accents":null,"lowercase":false},"pre_tokenizer":{"type":"BertPreTokenizer"},"post_processor":{"type":"TemplateProcessing","single":[{"SpecialToken":{"id":"[CLS]","type_id":0}},{"Sequence":{"id":"A","type_id":0}},{"SpecialToken":{"id":"[SEP]","type_id":0}}],"pair":[{"SpecialToken":{"id":"[CLS]","type_id":0}},{"Sequence":{"id":"A","type_id":0}},{"SpecialToken":{"id":"[SEP]","type_id":0}},{"Sequence":{"id":"B","type_id":0}},{"SpecialToken":{"id":"[SEP]","type_id":0}}],"special_tokens":{"[CLS]":{"id":"[CLS]","ids":[0],"tokens":["[CLS]"]},"[SEP]":{"id":"[SEP]","ids":[2],"tokens":["[SEP]"]}}},"decoder":{"type":"WordPiece","prefix":"##","cleanup":true},"model":{"type":"WordPiece","unk_token":"[UNK]","continuing_subword_prefix":"##","max_input_chars_per_word":100,"vocab":{"[CLS]":0,"[PAD]":1,"[SEP]":2,"[UNK]":3,"[MASK]":4,"!":5,"\"":6,"#":7,"$":8,"%":9,"&":10,"'":11,"(":12,")":13,"*":14,"+":15,",":16,"-":17,".":18,"/":19,"0":20,"1":21,"2":22,"3":23,"4":24,"5":25,"6":26,"7":27,"8":28,"9":29,":":30,";":31,"<":32,"=":33,">":34,"?":35,"@":36,"A":37,"B":38,"C":39,"D":40,"E":41,"F":42,"G":43,"H":44,"I":45,"J":46,"K":47,"L":48,"M":49,"N":50,"O":51,"P":52,"Q":53,"R":54,"S":55,"T":56,"U":57,"V":58,"W":59,"X":60,"Y":61,"Z":62,"[":63,"]":64,"^":65,"_":66,"`":67,"a":68,"b":69,"c":70,"d":71,"e":72,"f":73,"g":74,"h":75,"i":76,"j":77,"k":78,"l":79,"m":80,"n":81,"o":82,"p":83,"q":84,"r":85,"s":86,"t":87,"u":88,"v":89,"w":90,"x":91,"y":92,"z":93,"{":94,"|":95,"}":96,"~":97,"²":98,"´":99,"·":100,"×":101,"é":102,"а":103,"е":104,"и":105,"н":106,"о":107,"р":108,"с":109,"т":110,"ᆞ":111,"—":112,"―":113,"":114,"":115,"“":116,"”":117,"•":118,"":119,"‥":120,"…":121,"‧":122,"※":123,"℃":124,"":125,"":126,"Ⅱ":127,"Ⅲ":128,"↑":129,"→":130,"∙":131,"":132,"≪":133,"≫":134,"⊙":135,"①":136,"②":137,"③":138,"④":139,"⑤":140,"⑥":141,"⑦":142,"─":143,"━":144,"│":145,"■":146,"□":147,"▲":148,"△":149,"▶":150,"▷":151,"▼":152,"◆":153,"◇":154,"◈":155,"○":156,"●":157,"★":158,"☆":159,"☞":160,"♀":161,"♡":162,"♥":163,"✔":164,"✨":165,"❤":166,"":167,"〈":168,"〉":169,"《":170,"》":171,"「":172,"」":173,"『":174,"』":175,"【":176,"】":177,"":178,"":179,"の":180,"・":181,"ㄱ":182,"ㄴ":183,"ㄷ":184,"ㄹ":185,"ㅁ":186,"ㅂ":187,"ㅅ":188,"ㅇ":189,"ㅈ":190,"ㅋ":191,"ㅌ":192,"ㅎ":193,"ㅏ":194,"ㅜ":195,"ㅠ":196,"ㅡ":197,"ㅣ":198,"":199,"ㆍ":200,"㈜":201,"㎎":202,"㎏":203,"㎖":204,"㎜":205,"㎝":206,"㎞":207,"㎡":208,"㎢":209,"㎥":210,"㎾":211,"㏊":212,"一":213,"七":214,"三":215,"上":216,"下":217,"不":218,"世":219,"中":220,"主":221,"之":222,"九":223,"也":224,"事":225,"二":226,"五":227,"京":228,"人":229,"仁":230,"代":231,"以":232,"任":233,"佛":234,"作":235,"使":236,"來":237,"例":238,"保":239,"信":240,"修":241,"傳":242,"債":243,"元":244,"先":245,"光":246,"入":247,"內":248,"全":249,"八":250,"公":251,"共":252,"兵":253,"其":254,"典":255,"出":256,"分":257,"判":258,"別":259,"利":260,"制":261,"則":262,"前":263,"力":264,"動":265,"務":266,"化":267,"北":268,"十":269,"南":270,"原":271,"反":272,"受"