147 lines
4.1 KiB
JSON
147 lines
4.1 KiB
JSON
{
|
||
"added_tokens_decoder": {
|
||
"151329": {
|
||
"content": "<|endoftext|>",
|
||
"lstrip": false,
|
||
"normalized": false,
|
||
"rstrip": false,
|
||
"single_word": false,
|
||
"special": true
|
||
},
|
||
"151330": {
|
||
"content": "[MASK]",
|
||
"lstrip": false,
|
||
"normalized": false,
|
||
"rstrip": false,
|
||
"single_word": false,
|
||
"special": true
|
||
},
|
||
"151331": {
|
||
"content": "[gMASK]",
|
||
"lstrip": false,
|
||
"normalized": false,
|
||
"rstrip": false,
|
||
"single_word": false,
|
||
"special": true
|
||
},
|
||
"151332": {
|
||
"content": "[sMASK]",
|
||
"lstrip": false,
|
||
"normalized": false,
|
||
"rstrip": false,
|
||
"single_word": false,
|
||
"special": true
|
||
},
|
||
"151333": {
|
||
"content": "<sop>",
|
||
"lstrip": false,
|
||
"normalized": false,
|
||
"rstrip": false,
|
||
"single_word": false,
|
||
"special": true
|
||
},
|
||
"151334": {
|
||
"content": "<eop>",
|
||
"lstrip": false,
|
||
"normalized": false,
|
||
"rstrip": false,
|
||
"single_word": false,
|
||
"special": true
|
||
},
|
||
"151335": {
|
||
"content": "<|system|>",
|
||
"lstrip": false,
|
||
"normalized": false,
|
||
"rstrip": false,
|
||
"single_word": false,
|
||
"special": true
|
||
},
|
||
"151336": {
|
||
"content": "<|user|>",
|
||
"lstrip": false,
|
||
"normalized": false,
|
||
"rstrip": false,
|
||
"single_word": false,
|
||
"special": true
|
||
},
|
||
"151337": {
|
||
"content": "<|assistant|>",
|
||
"lstrip": false,
|
||
"normalized": false,
|
||
"rstrip": false,
|
||
"single_word": false,
|
||
"special": true
|
||
},
|
||
"151338": {
|
||
"content": "<|observation|>",
|
||
"lstrip": false,
|
||
"normalized": false,
|
||
"rstrip": false,
|
||
"single_word": false,
|
||
"special": true
|
||
},
|
||
"151339": {
|
||
"content": "<|begin_of_image|>",
|
||
"lstrip": false,
|
||
"normalized": false,
|
||
"rstrip": false,
|
||
"single_word": false,
|
||
"special": true
|
||
},
|
||
"151340": {
|
||
"content": "<|end_of_image|>",
|
||
"lstrip": false,
|
||
"normalized": false,
|
||
"rstrip": false,
|
||
"single_word": false,
|
||
"special": true
|
||
},
|
||
"151341": {
|
||
"content": "<|begin_of_video|>",
|
||
"lstrip": false,
|
||
"normalized": false,
|
||
"rstrip": false,
|
||
"single_word": false,
|
||
"special": true
|
||
},
|
||
"151342": {
|
||
"content": "<|end_of_video|>",
|
||
"lstrip": false,
|
||
"normalized": false,
|
||
"rstrip": false,
|
||
"single_word": false,
|
||
"special": true
|
||
}
|
||
},
|
||
"additional_special_tokens": [
|
||
"<|endoftext|>",
|
||
"[MASK]",
|
||
"[gMASK]",
|
||
"[sMASK]",
|
||
"<sop>",
|
||
"<eop>",
|
||
"<|system|>",
|
||
"<|user|>",
|
||
"<|assistant|>",
|
||
"<|observation|>",
|
||
"<|begin_of_image|>",
|
||
"<|end_of_image|>",
|
||
"<|begin_of_video|>",
|
||
"<|end_of_video|>"
|
||
],
|
||
"chat_template": "[gMASK]<sop>{%- if tools -%}<|system|>\n# 可用工具\n{% for tool in tools %}{%- set function = tool.function if tool.get(\"function\") else tool %}\n\n## {{ function.name }}\n\n{{ function | tojson(indent=4, ensure_ascii=False) }}\n在调用上述函数时,请使用 Json 格式表示调用的参数。{%- endfor %}{%- endif -%}{%- for msg in messages %}{%- if msg.role == 'system' %}<|system|>\n{{ msg.content }}{%- endif %}{%- endfor %}{%- for message in messages if message.role != 'system' %}{%- set role = message['role'] %}{%- set content = message['content'] %}{%- set meta = message.get(\"metadata\", \"\") %}{%- if role == 'user' %}<|user|>\n{{ content }}{%- elif role == 'assistant' and not meta %}<|assistant|>\n{{ content }}{%- elif role == 'assistant' and meta %}<|assistant|>{{ meta }} \n{{ content }}{%- elif role == 'observation' %}<|observation|>\n{{ content }}{%- endif %}{%- endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
|
||
"clean_up_tokenization_spaces": false,
|
||
"do_lower_case": false,
|
||
"eos_token": "<|user|>",
|
||
"extra_special_tokens": {},
|
||
"model_input_names": [
|
||
"input_ids",
|
||
"attention_mask"
|
||
],
|
||
"model_max_length": 128000,
|
||
"pad_token": "<|endoftext|>",
|
||
"padding_side": "left",
|
||
"remove_space": false,
|
||
"tokenizer_class": "PreTrainedTokenizer"
|
||
}
|