From 06bd938075968adc98bc4080bfcd65a8c2a25250 Mon Sep 17 00:00:00 2001 From: Reading Comprehension Group of HFL Date: Wed, 29 May 2024 05:14:31 +0000 Subject: [PATCH] update config file to sync meta's recent changes --- config.json | 4 ++-- special_tokens_map.json | 23 ++--------------------- tokenizer_config.json | 5 ++--- 3 files changed, 6 insertions(+), 26 deletions(-) diff --git a/config.json b/config.json index a9978f0..31553d6 100644 --- a/config.json +++ b/config.json @@ -5,7 +5,7 @@ "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, - "eos_token_id": 128001, + "eos_token_id": 128009, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, @@ -24,4 +24,4 @@ "transformers_version": "4.40.0.dev0", "use_cache": true, "vocab_size": 128256 -} \ No newline at end of file +} diff --git a/special_tokens_map.json b/special_tokens_map.json index e5b39b6..d8cd507 100644 --- a/special_tokens_map.json +++ b/special_tokens_map.json @@ -1,23 +1,4 @@ { - "bos_token": { - "content": "<|begin_of_text|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "eos_token": { - "content": "<|end_of_text|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "pad_token": { - "content": "<|end_of_text|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - } + "bos_token": "<|begin_of_text|>", + "eos_token": "<|end_of_text|>" } diff --git a/tokenizer_config.json b/tokenizer_config.json index dec8354..1bfd114 100644 --- a/tokenizer_config.json +++ b/tokenizer_config.json @@ -2050,14 +2050,13 @@ } }, "bos_token": "<|begin_of_text|>", - "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", "clean_up_tokenization_spaces": true, - "eos_token": "<|end_of_text|>", + "eos_token": "<|eot_id|>", "model_input_names": [ "input_ids", "attention_mask" ], "model_max_length": 1000000000000000019884624838656, - "pad_token": "<|end_of_text|>", "tokenizer_class": "PreTrainedTokenizerFast" }