From 0259592198889be59037c193af1ff18263177eb8 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Sun, 17 May 2026 22:14:18 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: open-unlearning/tofu_Llama-3.2-1B-Instruct_full Source: Original Platform --- .gitattributes | 36 + README.md | 3 + config.json | 40 + generation_config.json | 12 + model.safetensors | 3 + special_tokens_map.json | 17 + tokenizer.json | 3 + tokenizer_config.json | 2063 +++++++++++++++++++++++++++++++++++++++ trainer_state.json | 917 +++++++++++++++++ training_args.bin | 3 + 10 files changed, 3097 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 config.json create mode 100644 generation_config.json create mode 100644 model.safetensors create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 trainer_state.json create mode 100644 training_args.bin diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..20c4753 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +--- +license: bsd-3-clause +--- diff --git a/config.json b/config.json new file mode 100644 index 0000000..fa2bc5c --- /dev/null +++ b/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "meta-llama/Llama-3.2-1B-Instruct", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.45.1", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..c48a399 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.45.1" +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..26d905c --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35b0cd9f7db2735538ff78b4879fab61b23b7ca316df27b059585bf656700a8d +size 2471645608 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..b43be96 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..1c1d8d5 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..de2d513 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..897f826 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,917 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.0, + "eval_steps": 500, + "global_step": 625, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.04, + "grad_norm": 89.82371603949359, + "learning_rate": 4.0000000000000003e-07, + "loss": 2.6564, + "step": 5 + }, + { + "epoch": 0.08, + "grad_norm": 75.2735878207548, + "learning_rate": 8.000000000000001e-07, + "loss": 2.6478, + "step": 10 + }, + { + "epoch": 0.12, + "grad_norm": 33.58735952139897, + "learning_rate": 1.2000000000000002e-06, + "loss": 2.2785, + "step": 15 + }, + { + "epoch": 0.16, + "grad_norm": 14.171883076827761, + "learning_rate": 1.6000000000000001e-06, + "loss": 2.1944, + "step": 20 + }, + { + "epoch": 0.2, + "grad_norm": 14.61980793964136, + "learning_rate": 2.0000000000000003e-06, + "loss": 2.0565, + "step": 25 + }, + { + "epoch": 0.24, + "grad_norm": 11.030361913924636, + "learning_rate": 2.4000000000000003e-06, + "loss": 1.9451, + "step": 30 + }, + { + "epoch": 0.28, + "grad_norm": 11.483038196037421, + "learning_rate": 2.8000000000000003e-06, + "loss": 1.8853, + "step": 35 + }, + { + "epoch": 0.32, + "grad_norm": 10.282307478721446, + "learning_rate": 3.2000000000000003e-06, + "loss": 1.8899, + "step": 40 + }, + { + "epoch": 0.36, + "grad_norm": 9.403395764096778, + "learning_rate": 3.6000000000000003e-06, + "loss": 1.8369, + "step": 45 + }, + { + "epoch": 0.4, + "grad_norm": 8.39082411153001, + "learning_rate": 4.000000000000001e-06, + "loss": 1.8321, + "step": 50 + }, + { + "epoch": 0.44, + "grad_norm": 8.467837375829392, + "learning_rate": 4.4e-06, + "loss": 1.7815, + "step": 55 + }, + { + "epoch": 0.48, + "grad_norm": 7.619179269881611, + "learning_rate": 4.800000000000001e-06, + "loss": 1.7541, + "step": 60 + }, + { + "epoch": 0.52, + "grad_norm": 8.495247169424331, + "learning_rate": 5.2e-06, + "loss": 1.8144, + "step": 65 + }, + { + "epoch": 0.56, + "grad_norm": 7.812369921180664, + "learning_rate": 5.600000000000001e-06, + "loss": 1.7004, + "step": 70 + }, + { + "epoch": 0.6, + "grad_norm": 8.024623815868656, + "learning_rate": 6e-06, + "loss": 1.7253, + "step": 75 + }, + { + "epoch": 0.64, + "grad_norm": 7.360902183318722, + "learning_rate": 6.4000000000000006e-06, + "loss": 1.7047, + "step": 80 + }, + { + "epoch": 0.68, + "grad_norm": 8.562290112967787, + "learning_rate": 6.800000000000001e-06, + "loss": 1.6259, + "step": 85 + }, + { + "epoch": 0.72, + "grad_norm": 7.497861834873093, + "learning_rate": 7.2000000000000005e-06, + "loss": 1.6508, + "step": 90 + }, + { + "epoch": 0.76, + "grad_norm": 7.977323163225298, + "learning_rate": 7.600000000000001e-06, + "loss": 1.6438, + "step": 95 + }, + { + "epoch": 0.8, + "grad_norm": 8.076428643203684, + "learning_rate": 8.000000000000001e-06, + "loss": 1.6396, + "step": 100 + }, + { + "epoch": 0.84, + "grad_norm": 7.976089395817468, + "learning_rate": 8.400000000000001e-06, + "loss": 1.6967, + "step": 105 + }, + { + "epoch": 0.88, + "grad_norm": 7.0639566402763245, + "learning_rate": 8.8e-06, + "loss": 1.5554, + "step": 110 + }, + { + "epoch": 0.92, + "grad_norm": 7.731398632191519, + "learning_rate": 9.200000000000002e-06, + "loss": 1.5971, + "step": 115 + }, + { + "epoch": 0.96, + "grad_norm": 7.937712860421918, + "learning_rate": 9.600000000000001e-06, + "loss": 1.5833, + "step": 120 + }, + { + "epoch": 1.0, + "grad_norm": 7.563914418145805, + "learning_rate": 1e-05, + "loss": 1.555, + "step": 125 + }, + { + "epoch": 1.04, + "grad_norm": 7.803252722361364, + "learning_rate": 9.9e-06, + "loss": 1.3656, + "step": 130 + }, + { + "epoch": 1.08, + "grad_norm": 7.929626065845273, + "learning_rate": 9.800000000000001e-06, + "loss": 1.4725, + "step": 135 + }, + { + "epoch": 1.12, + "grad_norm": 7.161720074072072, + "learning_rate": 9.7e-06, + "loss": 1.4018, + "step": 140 + }, + { + "epoch": 1.16, + "grad_norm": 8.135421849383178, + "learning_rate": 9.600000000000001e-06, + "loss": 1.378, + "step": 145 + }, + { + "epoch": 1.2, + "grad_norm": 7.591888228234121, + "learning_rate": 9.5e-06, + "loss": 1.4169, + "step": 150 + }, + { + "epoch": 1.24, + "grad_norm": 7.506276067252539, + "learning_rate": 9.4e-06, + "loss": 1.3111, + "step": 155 + }, + { + "epoch": 1.28, + "grad_norm": 8.30322573720057, + "learning_rate": 9.3e-06, + "loss": 1.3481, + "step": 160 + }, + { + "epoch": 1.32, + "grad_norm": 8.49867887645396, + "learning_rate": 9.200000000000002e-06, + "loss": 1.3461, + "step": 165 + }, + { + "epoch": 1.3599999999999999, + "grad_norm": 8.401741468634247, + "learning_rate": 9.100000000000001e-06, + "loss": 1.3903, + "step": 170 + }, + { + "epoch": 1.4, + "grad_norm": 8.65770914891247, + "learning_rate": 9e-06, + "loss": 1.2654, + "step": 175 + }, + { + "epoch": 1.44, + "grad_norm": 8.500088292254128, + "learning_rate": 8.900000000000001e-06, + "loss": 1.3821, + "step": 180 + }, + { + "epoch": 1.48, + "grad_norm": 7.421515621477695, + "learning_rate": 8.8e-06, + "loss": 1.3662, + "step": 185 + }, + { + "epoch": 1.52, + "grad_norm": 8.190508815008455, + "learning_rate": 8.700000000000001e-06, + "loss": 1.3802, + "step": 190 + }, + { + "epoch": 1.56, + "grad_norm": 7.428504273817973, + "learning_rate": 8.6e-06, + "loss": 1.3208, + "step": 195 + }, + { + "epoch": 1.6, + "grad_norm": 8.293872163578715, + "learning_rate": 8.5e-06, + "loss": 1.3723, + "step": 200 + }, + { + "epoch": 1.6400000000000001, + "grad_norm": 7.668913906629593, + "learning_rate": 8.400000000000001e-06, + "loss": 1.3877, + "step": 205 + }, + { + "epoch": 1.6800000000000002, + "grad_norm": 7.2640670839071175, + "learning_rate": 8.3e-06, + "loss": 1.3366, + "step": 210 + }, + { + "epoch": 1.72, + "grad_norm": 8.45905985289491, + "learning_rate": 8.2e-06, + "loss": 1.3809, + "step": 215 + }, + { + "epoch": 1.76, + "grad_norm": 8.420327743224147, + "learning_rate": 8.1e-06, + "loss": 1.3381, + "step": 220 + }, + { + "epoch": 1.8, + "grad_norm": 7.091088067273984, + "learning_rate": 8.000000000000001e-06, + "loss": 1.3458, + "step": 225 + }, + { + "epoch": 1.8399999999999999, + "grad_norm": 8.157993701322475, + "learning_rate": 7.9e-06, + "loss": 1.3222, + "step": 230 + }, + { + "epoch": 1.88, + "grad_norm": 7.517398704427892, + "learning_rate": 7.800000000000002e-06, + "loss": 1.3658, + "step": 235 + }, + { + "epoch": 1.92, + "grad_norm": 8.142832872735468, + "learning_rate": 7.7e-06, + "loss": 1.3645, + "step": 240 + }, + { + "epoch": 1.96, + "grad_norm": 7.781387568953773, + "learning_rate": 7.600000000000001e-06, + "loss": 1.3339, + "step": 245 + }, + { + "epoch": 2.0, + "grad_norm": 8.018401738016298, + "learning_rate": 7.500000000000001e-06, + "loss": 1.3626, + "step": 250 + }, + { + "epoch": 2.04, + "grad_norm": 7.178146988044731, + "learning_rate": 7.4e-06, + "loss": 0.9447, + "step": 255 + }, + { + "epoch": 2.08, + "grad_norm": 7.582541553977795, + "learning_rate": 7.3e-06, + "loss": 0.9022, + "step": 260 + }, + { + "epoch": 2.12, + "grad_norm": 9.803729014002805, + "learning_rate": 7.2000000000000005e-06, + "loss": 0.9051, + "step": 265 + }, + { + "epoch": 2.16, + "grad_norm": 9.447767686350764, + "learning_rate": 7.100000000000001e-06, + "loss": 0.8667, + "step": 270 + }, + { + "epoch": 2.2, + "grad_norm": 8.327471781348626, + "learning_rate": 7e-06, + "loss": 0.796, + "step": 275 + }, + { + "epoch": 2.24, + "grad_norm": 9.465641503438128, + "learning_rate": 6.9e-06, + "loss": 0.9401, + "step": 280 + }, + { + "epoch": 2.2800000000000002, + "grad_norm": 8.678391102462472, + "learning_rate": 6.800000000000001e-06, + "loss": 0.865, + "step": 285 + }, + { + "epoch": 2.32, + "grad_norm": 9.266451696995004, + "learning_rate": 6.700000000000001e-06, + "loss": 0.9266, + "step": 290 + }, + { + "epoch": 2.36, + "grad_norm": 10.225758737461298, + "learning_rate": 6.600000000000001e-06, + "loss": 0.907, + "step": 295 + }, + { + "epoch": 2.4, + "grad_norm": 8.465991958734747, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.9329, + "step": 300 + }, + { + "epoch": 2.44, + "grad_norm": 8.432762841049588, + "learning_rate": 6.4000000000000006e-06, + "loss": 0.91, + "step": 305 + }, + { + "epoch": 2.48, + "grad_norm": 10.012927090762362, + "learning_rate": 6.300000000000001e-06, + "loss": 0.8835, + "step": 310 + }, + { + "epoch": 2.52, + "grad_norm": 8.405221169802761, + "learning_rate": 6.200000000000001e-06, + "loss": 0.8803, + "step": 315 + }, + { + "epoch": 2.56, + "grad_norm": 8.567685032288303, + "learning_rate": 6.1e-06, + "loss": 0.8581, + "step": 320 + }, + { + "epoch": 2.6, + "grad_norm": 9.655710893112909, + "learning_rate": 6e-06, + "loss": 0.8636, + "step": 325 + }, + { + "epoch": 2.64, + "grad_norm": 9.213194754159915, + "learning_rate": 5.9e-06, + "loss": 0.8703, + "step": 330 + }, + { + "epoch": 2.68, + "grad_norm": 8.394853800291447, + "learning_rate": 5.8e-06, + "loss": 0.8889, + "step": 335 + }, + { + "epoch": 2.7199999999999998, + "grad_norm": 8.25623902667668, + "learning_rate": 5.7e-06, + "loss": 0.9199, + "step": 340 + }, + { + "epoch": 2.76, + "grad_norm": 9.432213529061965, + "learning_rate": 5.600000000000001e-06, + "loss": 0.9354, + "step": 345 + }, + { + "epoch": 2.8, + "grad_norm": 9.331975998189465, + "learning_rate": 5.500000000000001e-06, + "loss": 0.8731, + "step": 350 + }, + { + "epoch": 2.84, + "grad_norm": 9.419112744932255, + "learning_rate": 5.400000000000001e-06, + "loss": 0.9096, + "step": 355 + }, + { + "epoch": 2.88, + "grad_norm": 9.044996816630446, + "learning_rate": 5.300000000000001e-06, + "loss": 0.8739, + "step": 360 + }, + { + "epoch": 2.92, + "grad_norm": 9.589663642804883, + "learning_rate": 5.2e-06, + "loss": 0.9271, + "step": 365 + }, + { + "epoch": 2.96, + "grad_norm": 9.193576698813617, + "learning_rate": 5.1e-06, + "loss": 0.8918, + "step": 370 + }, + { + "epoch": 3.0, + "grad_norm": 9.81010310005968, + "learning_rate": 5e-06, + "loss": 0.8967, + "step": 375 + }, + { + "epoch": 3.04, + "grad_norm": 8.019001290351486, + "learning_rate": 4.9000000000000005e-06, + "loss": 0.5329, + "step": 380 + }, + { + "epoch": 3.08, + "grad_norm": 10.84261876285756, + "learning_rate": 4.800000000000001e-06, + "loss": 0.4985, + "step": 385 + }, + { + "epoch": 3.12, + "grad_norm": 9.387923983319462, + "learning_rate": 4.7e-06, + "loss": 0.468, + "step": 390 + }, + { + "epoch": 3.16, + "grad_norm": 8.269965182173046, + "learning_rate": 4.600000000000001e-06, + "loss": 0.4458, + "step": 395 + }, + { + "epoch": 3.2, + "grad_norm": 9.535893549281663, + "learning_rate": 4.5e-06, + "loss": 0.4821, + "step": 400 + }, + { + "epoch": 3.24, + "grad_norm": 9.401974393546057, + "learning_rate": 4.4e-06, + "loss": 0.472, + "step": 405 + }, + { + "epoch": 3.2800000000000002, + "grad_norm": 11.09046610543629, + "learning_rate": 4.3e-06, + "loss": 0.483, + "step": 410 + }, + { + "epoch": 3.32, + "grad_norm": 8.638663259816369, + "learning_rate": 4.2000000000000004e-06, + "loss": 0.462, + "step": 415 + }, + { + "epoch": 3.36, + "grad_norm": 9.601865633584897, + "learning_rate": 4.1e-06, + "loss": 0.4518, + "step": 420 + }, + { + "epoch": 3.4, + "grad_norm": 9.288745640929617, + "learning_rate": 4.000000000000001e-06, + "loss": 0.465, + "step": 425 + }, + { + "epoch": 3.44, + "grad_norm": 9.868753828103186, + "learning_rate": 3.900000000000001e-06, + "loss": 0.4716, + "step": 430 + }, + { + "epoch": 3.48, + "grad_norm": 11.773719476313133, + "learning_rate": 3.8000000000000005e-06, + "loss": 0.4535, + "step": 435 + }, + { + "epoch": 3.52, + "grad_norm": 10.591718001217169, + "learning_rate": 3.7e-06, + "loss": 0.4815, + "step": 440 + }, + { + "epoch": 3.56, + "grad_norm": 8.897682169461483, + "learning_rate": 3.6000000000000003e-06, + "loss": 0.4505, + "step": 445 + }, + { + "epoch": 3.6, + "grad_norm": 9.563808712230394, + "learning_rate": 3.5e-06, + "loss": 0.46, + "step": 450 + }, + { + "epoch": 3.64, + "grad_norm": 9.828440492379743, + "learning_rate": 3.4000000000000005e-06, + "loss": 0.4762, + "step": 455 + }, + { + "epoch": 3.68, + "grad_norm": 8.489390772662722, + "learning_rate": 3.3000000000000006e-06, + "loss": 0.4984, + "step": 460 + }, + { + "epoch": 3.7199999999999998, + "grad_norm": 10.014450411847454, + "learning_rate": 3.2000000000000003e-06, + "loss": 0.484, + "step": 465 + }, + { + "epoch": 3.76, + "grad_norm": 9.201375259300189, + "learning_rate": 3.1000000000000004e-06, + "loss": 0.4669, + "step": 470 + }, + { + "epoch": 3.8, + "grad_norm": 9.51157246451395, + "learning_rate": 3e-06, + "loss": 0.4643, + "step": 475 + }, + { + "epoch": 3.84, + "grad_norm": 9.62282368600112, + "learning_rate": 2.9e-06, + "loss": 0.4911, + "step": 480 + }, + { + "epoch": 3.88, + "grad_norm": 9.444343630230321, + "learning_rate": 2.8000000000000003e-06, + "loss": 0.4856, + "step": 485 + }, + { + "epoch": 3.92, + "grad_norm": 10.047185908955532, + "learning_rate": 2.7000000000000004e-06, + "loss": 0.4907, + "step": 490 + }, + { + "epoch": 3.96, + "grad_norm": 10.121432836197208, + "learning_rate": 2.6e-06, + "loss": 0.4956, + "step": 495 + }, + { + "epoch": 4.0, + "grad_norm": 10.897990190318387, + "learning_rate": 2.5e-06, + "loss": 0.4784, + "step": 500 + }, + { + "epoch": 4.04, + "grad_norm": 7.43706714195581, + "learning_rate": 2.4000000000000003e-06, + "loss": 0.235, + "step": 505 + }, + { + "epoch": 4.08, + "grad_norm": 10.809663179549249, + "learning_rate": 2.3000000000000004e-06, + "loss": 0.2138, + "step": 510 + }, + { + "epoch": 4.12, + "grad_norm": 10.055114463726696, + "learning_rate": 2.2e-06, + "loss": 0.2159, + "step": 515 + }, + { + "epoch": 4.16, + "grad_norm": 10.338716351009188, + "learning_rate": 2.1000000000000002e-06, + "loss": 0.1913, + "step": 520 + }, + { + "epoch": 4.2, + "grad_norm": 8.793111787622328, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.207, + "step": 525 + }, + { + "epoch": 4.24, + "grad_norm": 8.159670512299746, + "learning_rate": 1.9000000000000002e-06, + "loss": 0.1985, + "step": 530 + }, + { + "epoch": 4.28, + "grad_norm": 8.482017636931404, + "learning_rate": 1.8000000000000001e-06, + "loss": 0.1868, + "step": 535 + }, + { + "epoch": 4.32, + "grad_norm": 10.360178503982437, + "learning_rate": 1.7000000000000002e-06, + "loss": 0.1945, + "step": 540 + }, + { + "epoch": 4.36, + "grad_norm": 7.875689134918156, + "learning_rate": 1.6000000000000001e-06, + "loss": 0.2125, + "step": 545 + }, + { + "epoch": 4.4, + "grad_norm": 8.42269826376436, + "learning_rate": 1.5e-06, + "loss": 0.1994, + "step": 550 + }, + { + "epoch": 4.44, + "grad_norm": 8.05875558929426, + "learning_rate": 1.4000000000000001e-06, + "loss": 0.1751, + "step": 555 + }, + { + "epoch": 4.48, + "grad_norm": 9.480349080658375, + "learning_rate": 1.3e-06, + "loss": 0.2035, + "step": 560 + }, + { + "epoch": 4.52, + "grad_norm": 8.57557041236713, + "learning_rate": 1.2000000000000002e-06, + "loss": 0.1908, + "step": 565 + }, + { + "epoch": 4.5600000000000005, + "grad_norm": 8.597528432511735, + "learning_rate": 1.1e-06, + "loss": 0.2119, + "step": 570 + }, + { + "epoch": 4.6, + "grad_norm": 8.009913537969426, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.1905, + "step": 575 + }, + { + "epoch": 4.64, + "grad_norm": 8.32445977305177, + "learning_rate": 9.000000000000001e-07, + "loss": 0.2243, + "step": 580 + }, + { + "epoch": 4.68, + "grad_norm": 8.993283223073181, + "learning_rate": 8.000000000000001e-07, + "loss": 0.2065, + "step": 585 + }, + { + "epoch": 4.72, + "grad_norm": 6.970573245330436, + "learning_rate": 7.000000000000001e-07, + "loss": 0.1958, + "step": 590 + }, + { + "epoch": 4.76, + "grad_norm": 7.645253401257893, + "learning_rate": 6.000000000000001e-07, + "loss": 0.2064, + "step": 595 + }, + { + "epoch": 4.8, + "grad_norm": 9.453343432891174, + "learning_rate": 5.000000000000001e-07, + "loss": 0.209, + "step": 600 + }, + { + "epoch": 4.84, + "grad_norm": 8.256722521448035, + "learning_rate": 4.0000000000000003e-07, + "loss": 0.2309, + "step": 605 + }, + { + "epoch": 4.88, + "grad_norm": 8.523020194829563, + "learning_rate": 3.0000000000000004e-07, + "loss": 0.212, + "step": 610 + }, + { + "epoch": 4.92, + "grad_norm": 8.809891202943627, + "learning_rate": 2.0000000000000002e-07, + "loss": 0.1959, + "step": 615 + }, + { + "epoch": 4.96, + "grad_norm": 8.54338346414983, + "learning_rate": 1.0000000000000001e-07, + "loss": 0.1868, + "step": 620 + }, + { + "epoch": 5.0, + "grad_norm": 8.459641356527731, + "learning_rate": 0.0, + "loss": 0.1983, + "step": 625 + }, + { + "epoch": 5.0, + "step": 625, + "total_flos": 863416958976.0, + "train_loss": 0.9574172312736511, + "train_runtime": 745.662, + "train_samples_per_second": 26.822, + "train_steps_per_second": 0.838 + } + ], + "logging_steps": 5, + "max_steps": 625, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 863416958976.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..a3eb926 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c941af7cda405dc7296a44b1a51f96f90a196f9ee32a9ad6b56d18dafc0c94e4 +size 6904