147 lines
4.3 KiB
JSON
147 lines
4.3 KiB
JSON
|
|
{
|
|||
|
|
"added_tokens_decoder": {
|
|||
|
|
"151329": {
|
|||
|
|
"content": "<|endoftext|>",
|
|||
|
|
"lstrip": false,
|
|||
|
|
"normalized": false,
|
|||
|
|
"rstrip": false,
|
|||
|
|
"single_word": false,
|
|||
|
|
"special": true
|
|||
|
|
},
|
|||
|
|
"151330": {
|
|||
|
|
"content": "[MASK]",
|
|||
|
|
"lstrip": false,
|
|||
|
|
"normalized": false,
|
|||
|
|
"rstrip": false,
|
|||
|
|
"single_word": false,
|
|||
|
|
"special": true
|
|||
|
|
},
|
|||
|
|
"151331": {
|
|||
|
|
"content": "[gMASK]",
|
|||
|
|
"lstrip": false,
|
|||
|
|
"normalized": false,
|
|||
|
|
"rstrip": false,
|
|||
|
|
"single_word": false,
|
|||
|
|
"special": true
|
|||
|
|
},
|
|||
|
|
"151332": {
|
|||
|
|
"content": "[sMASK]",
|
|||
|
|
"lstrip": false,
|
|||
|
|
"normalized": false,
|
|||
|
|
"rstrip": false,
|
|||
|
|
"single_word": false,
|
|||
|
|
"special": true
|
|||
|
|
},
|
|||
|
|
"151333": {
|
|||
|
|
"content": "<sop>",
|
|||
|
|
"lstrip": false,
|
|||
|
|
"normalized": false,
|
|||
|
|
"rstrip": false,
|
|||
|
|
"single_word": false,
|
|||
|
|
"special": true
|
|||
|
|
},
|
|||
|
|
"151334": {
|
|||
|
|
"content": "<eop>",
|
|||
|
|
"lstrip": false,
|
|||
|
|
"normalized": false,
|
|||
|
|
"rstrip": false,
|
|||
|
|
"single_word": false,
|
|||
|
|
"special": true
|
|||
|
|
},
|
|||
|
|
"151335": {
|
|||
|
|
"content": "<|system|>",
|
|||
|
|
"lstrip": false,
|
|||
|
|
"normalized": false,
|
|||
|
|
"rstrip": false,
|
|||
|
|
"single_word": false,
|
|||
|
|
"special": true
|
|||
|
|
},
|
|||
|
|
"151336": {
|
|||
|
|
"content": "<|user|>",
|
|||
|
|
"lstrip": false,
|
|||
|
|
"normalized": false,
|
|||
|
|
"rstrip": false,
|
|||
|
|
"single_word": false,
|
|||
|
|
"special": true
|
|||
|
|
},
|
|||
|
|
"151337": {
|
|||
|
|
"content": "<|assistant|>",
|
|||
|
|
"lstrip": false,
|
|||
|
|
"normalized": false,
|
|||
|
|
"rstrip": false,
|
|||
|
|
"single_word": false,
|
|||
|
|
"special": true
|
|||
|
|
},
|
|||
|
|
"151338": {
|
|||
|
|
"content": "<|observation|>",
|
|||
|
|
"lstrip": false,
|
|||
|
|
"normalized": false,
|
|||
|
|
"rstrip": false,
|
|||
|
|
"single_word": false,
|
|||
|
|
"special": true
|
|||
|
|
},
|
|||
|
|
"151339": {
|
|||
|
|
"content": "<|begin_of_image|>",
|
|||
|
|
"lstrip": false,
|
|||
|
|
"normalized": false,
|
|||
|
|
"rstrip": false,
|
|||
|
|
"single_word": false,
|
|||
|
|
"special": true
|
|||
|
|
},
|
|||
|
|
"151340": {
|
|||
|
|
"content": "<|end_of_image|>",
|
|||
|
|
"lstrip": false,
|
|||
|
|
"normalized": false,
|
|||
|
|
"rstrip": false,
|
|||
|
|
"single_word": false,
|
|||
|
|
"special": true
|
|||
|
|
},
|
|||
|
|
"151341": {
|
|||
|
|
"content": "<|begin_of_video|>",
|
|||
|
|
"lstrip": false,
|
|||
|
|
"normalized": false,
|
|||
|
|
"rstrip": false,
|
|||
|
|
"single_word": false,
|
|||
|
|
"special": true
|
|||
|
|
},
|
|||
|
|
"151342": {
|
|||
|
|
"content": "<|end_of_video|>",
|
|||
|
|
"lstrip": false,
|
|||
|
|
"normalized": false,
|
|||
|
|
"rstrip": false,
|
|||
|
|
"single_word": false,
|
|||
|
|
"special": true
|
|||
|
|
}
|
|||
|
|
},
|
|||
|
|
"additional_special_tokens": [
|
|||
|
|
"<|endoftext|>",
|
|||
|
|
"[MASK]",
|
|||
|
|
"[gMASK]",
|
|||
|
|
"[sMASK]",
|
|||
|
|
"<sop>",
|
|||
|
|
"<eop>",
|
|||
|
|
"<|system|>",
|
|||
|
|
"<|user|>",
|
|||
|
|
"<|assistant|>",
|
|||
|
|
"<|observation|>",
|
|||
|
|
"<|begin_of_image|>",
|
|||
|
|
"<|end_of_image|>",
|
|||
|
|
"<|begin_of_video|>",
|
|||
|
|
"<|end_of_video|>"
|
|||
|
|
],
|
|||
|
|
"chat_template": "[gMASK]<sop>{%- if tools -%}<|system|>你是一个名为 ChatGLM 的人工智能助手。你是基于智谱 AI 公司训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具\n\n{% for tool in tools %}{%- set function = tool.function if tool.get(\"function\") else tool %}\n\n## {{ function.name }}\n\n{{ function | tojson(indent=4, ensure_ascii=False) }}\n在调用上述函数时,请使用 Json 格式表示调用的参数。{%- endfor %}{%- endif -%}{%- for msg in messages %}{%- if msg.role == 'system' %}<|system|>\n{{ msg.content }}{%- endif %}{%- endfor %}{%- for message in messages if message.role != 'system' %}{%- set role = message['role'] %}{%- set content = message['content'] %}{%- set visible = content.split('</think>')[-1].strip() %}{%- set meta = message.get(\"metadata\", \"\") %}{%- if role == 'user' %}<|user|>\n{{ visible }}{%- elif role == 'assistant' and not meta %}<|assistant|>\n{{ visible }}{%- elif role == 'assistant' and meta %}<|assistant|>{{ meta }} \n{{ visible }}{%- elif role == 'observation' %}<|observation|>\n{{ visible }}{%- endif %}{%- endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
|
|||
|
|
"clean_up_tokenization_spaces": false,
|
|||
|
|
"do_lower_case": false,
|
|||
|
|
"eos_token": "<|endoftext|>",
|
|||
|
|
"extra_special_tokens": {},
|
|||
|
|
"model_input_names": [
|
|||
|
|
"input_ids",
|
|||
|
|
"attention_mask"
|
|||
|
|
],
|
|||
|
|
"model_max_length": 128000,
|
|||
|
|
"pad_token": "<|endoftext|>",
|
|||
|
|
"padding_side": "left",
|
|||
|
|
"remove_space": false,
|
|||
|
|
"tokenizer_class": "PreTrainedTokenizer"
|
|||
|
|
}
|