From 3e5b593c5d910d583f3f95bc103df883196be4e6 Mon Sep 17 00:00:00 2001 From: ai-modelscope Date: Sat, 25 Jan 2025 06:06:55 +0800 Subject: [PATCH] Update README.md --- .gitattributes | 1 - chat_template.json | 3 +++ tokenizer_config.json | 25 +++++-------------------- 3 files changed, 8 insertions(+), 21 deletions(-) create mode 100644 chat_template.json diff --git a/.gitattributes b/.gitattributes index 2ef8cfb..a6344aa 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,4 +33,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text -pytorch_model.bin filter=lfs diff=lfs merge=lfs -text diff --git a/chat_template.json b/chat_template.json new file mode 100644 index 0000000..732bd68 --- /dev/null +++ b/chat_template.json @@ -0,0 +1,3 @@ +{ + "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}" +} \ No newline at end of file diff --git a/tokenizer_config.json b/tokenizer_config.json index 4e8bf33..01b36bc 100644 --- a/tokenizer_config.json +++ b/tokenizer_config.json @@ -114,31 +114,16 @@ "special": true } }, - "additional_special_tokens": [ - "<|im_start|>", - "<|im_end|>", - "<|object_ref_start|>", - "<|object_ref_end|>", - "<|box_start|>", - "<|box_end|>", - "<|quad_start|>", - "<|quad_end|>", - "<|vision_start|>", - "<|vision_end|>", - "<|vision_pad|>", - "<|image_pad|>", - "<|video_pad|>" - ], + "additional_special_tokens": ["<|im_start|>", "<|im_end|>", "<|object_ref_start|>","<|object_ref_end|>","<|box_start|>","<|box_end|>","<|quad_start|>","<|quad_end|>","<|vision_start|>","<|vision_end|>","<|vision_pad|>","<|image_pad|>","<|video_pad|>"], "bos_token": null, "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}", "clean_up_tokenization_spaces": false, "eos_token": "<|im_end|>", + "padding_side": "left", "errors": "replace", - "model_max_length": 4096, - "pad_token": null, - "padding_side": "right", - "processor_class": "Qwen2VLProcessor", + "model_max_length": 32768, + "pad_token": "<|endoftext|>", "split_special_tokens": false, "tokenizer_class": "Qwen2Tokenizer", "unk_token": null -} +} \ No newline at end of file